mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-01-15 16:18:06 -05:00
Compare commits
11 Commits
release-1.
...
release-1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6d6bafd13 | ||
|
|
9d1343dce3 | ||
|
|
11c0df07b7 | ||
|
|
ca8a799373 | ||
|
|
710b908290 | ||
|
|
c80ce4fff5 | ||
|
|
bc7b1fdd37 | ||
|
|
1b7d414784 | ||
|
|
e4eb775b63 | ||
|
|
a3632f5b4f | ||
|
|
2736d7e15e |
42
README.md
42
README.md
@@ -57,16 +57,16 @@ dream> q
|
||||
00011.png: "there's a fly in my soup" -n6 -g -S 2685670268
|
||||
~~~~
|
||||
|
||||
The dream> prompt's arguments are pretty much
|
||||
identical to those used in the Discord bot, except you don't need to
|
||||
type "!dream" (it doesn't hurt if you do). A significant change is that creation of individual images
|
||||
is now the default
|
||||
unless --grid (-g) is given. For backward compatibility, the -i switch is recognized.
|
||||
For command-line help type -h (or --help) at the dream> prompt.
|
||||
The dream> prompt's arguments are pretty much identical to those used
|
||||
in the Discord bot, except you don't need to type "!dream" (it doesn't
|
||||
hurt if you do). A significant change is that creation of individual
|
||||
images is now the default unless --grid (-g) is given. For backward
|
||||
compatibility, the -i switch is recognized. For command-line help
|
||||
type -h (or --help) at the dream> prompt.
|
||||
|
||||
The script itself also recognizes a series of command-line switches that will change
|
||||
important global defaults, such as the directory for image outputs and the location
|
||||
of the model weight files.
|
||||
The script itself also recognizes a series of command-line switches
|
||||
that will change important global defaults, such as the directory for
|
||||
image outputs and the location of the model weight files.
|
||||
|
||||
## Image-to-Image
|
||||
|
||||
@@ -84,8 +84,30 @@ The --init_img (-I) option gives the path to the seed picture. --strength (-f) c
|
||||
the original will be modified, ranging from 0.0 (keep the original intact), to 1.0 (ignore the original
|
||||
completely). The default is 0.75, and ranges from 0.25-0.75 give interesting results.
|
||||
|
||||
## Weighted Prompts
|
||||
|
||||
You may weight different sections of the prompt to tell the sampler to attach different levels of
|
||||
priority to them, by adding :(number) to the end of the section you wish to up- or downweight.
|
||||
For example consider this prompt:
|
||||
|
||||
~~~~
|
||||
tabby cat:0.25 white duck:0.75 hybrid
|
||||
~~~~
|
||||
|
||||
This will tell the sampler to invest 25% of its effort on the tabby
|
||||
cat aspect of the image and 75% on the white duck aspect
|
||||
(surprisingly, this example actually works). The prompt weights can
|
||||
use any combination of integers and floating point numbers, and they
|
||||
do not need to add up to 1. A practical example of using this type of
|
||||
weighting is described here:
|
||||
https://www.reddit.com/r/StableDiffusion/comments/wvb7q7/using_prompt_weights_to_tweak_an_image_with/
|
||||
|
||||
## Changes
|
||||
|
||||
* v1.06 (23 August 2022)
|
||||
* Added weighted prompt support contributed by [xraxra](https://github.com/xraxra)
|
||||
* Example of using weighted prompts to tweak a demonic figure contributed by [bmaltais](https://github.com/bmaltais)
|
||||
|
||||
* v1.05 (22 August 2022 - after the drop)
|
||||
* Filenames now use the following formats:
|
||||
000010.95183149.png -- Two files produced by the same command (e.g. -n2),
|
||||
@@ -301,7 +323,7 @@ lets you create images from a prompt in just three lines of code:
|
||||
~~~~
|
||||
from ldm.simplet2i import T2I
|
||||
model = T2I()
|
||||
outputs = model.text2image("a unicorn in manhattan")
|
||||
outputs = model.txt2img("a unicorn in manhattan")
|
||||
~~~~
|
||||
|
||||
Outputs is a list of lists in the format [[filename1,seed1],[filename2,seed2]...]
|
||||
|
||||
270
ldm/simplet2i.py
270
ldm/simplet2i.py
@@ -104,7 +104,7 @@ The vast majority of these arguments default to reasonable values.
|
||||
seed=None,
|
||||
cfg_scale=7.5,
|
||||
weights="models/ldm/stable-diffusion-v1/model.ckpt",
|
||||
config = "configs/latent-diffusion/txt2img-1p4B-eval.yaml",
|
||||
config = "configs/stable-diffusion/v1-inference.yaml",
|
||||
sampler_name="klms",
|
||||
latent_channels=4,
|
||||
downsampling_factor=8,
|
||||
@@ -143,7 +143,7 @@ The vast majority of these arguments default to reasonable values.
|
||||
|
||||
def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
|
||||
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
||||
cfg_scale=None,ddim_eta=None,strength=None,init_img=None):
|
||||
cfg_scale=None,ddim_eta=None,strength=None,init_img=None,skip_normalize=False):
|
||||
"""
|
||||
Generate an image from the prompt, writing iteration images into the outdir
|
||||
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
||||
@@ -186,65 +186,89 @@ The vast majority of these arguments default to reasonable values.
|
||||
images = list()
|
||||
seeds = list()
|
||||
filename = None
|
||||
image_count = 0
|
||||
tic = time.time()
|
||||
|
||||
with torch.no_grad():
|
||||
with precision_scope("cuda"):
|
||||
with model.ema_scope():
|
||||
all_samples = list()
|
||||
for n in trange(iterations, desc="Sampling"):
|
||||
seed_everything(seed)
|
||||
for prompts in tqdm(data, desc="data", dynamic_ncols=True):
|
||||
uc = None
|
||||
if cfg_scale != 1.0:
|
||||
uc = model.get_learned_conditioning(batch_size * [""])
|
||||
if isinstance(prompts, tuple):
|
||||
prompts = list(prompts)
|
||||
c = model.get_learned_conditioning(prompts)
|
||||
shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor]
|
||||
samples_ddim, _ = sampler.sample(S=steps,
|
||||
conditioning=c,
|
||||
batch_size=batch_size,
|
||||
shape=shape,
|
||||
verbose=False,
|
||||
unconditional_guidance_scale=cfg_scale,
|
||||
unconditional_conditioning=uc,
|
||||
eta=ddim_eta,
|
||||
x_T=start_code)
|
||||
|
||||
x_samples_ddim = model.decode_first_stage(samples_ddim)
|
||||
x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
# Gawd. Too many levels of indent here. Need to refactor into smaller routines!
|
||||
try:
|
||||
with torch.no_grad():
|
||||
with precision_scope("cuda"):
|
||||
with model.ema_scope():
|
||||
all_samples = list()
|
||||
for n in trange(iterations, desc="Sampling"):
|
||||
seed_everything(seed)
|
||||
for prompts in tqdm(data, desc="data", dynamic_ncols=True):
|
||||
uc = None
|
||||
if cfg_scale != 1.0:
|
||||
uc = model.get_learned_conditioning(batch_size * [""])
|
||||
if isinstance(prompts, tuple):
|
||||
prompts = list(prompts)
|
||||
|
||||
if not grid:
|
||||
for x_sample in x_samples_ddim:
|
||||
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
|
||||
filename = self._unique_filename(outdir,previousname=filename,
|
||||
seed=seed,isbatch=(batch_size>1))
|
||||
assert not os.path.exists(filename)
|
||||
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
|
||||
images.append([filename,seed])
|
||||
else:
|
||||
all_samples.append(x_samples_ddim)
|
||||
seeds.append(seed)
|
||||
# weighted sub-prompts
|
||||
subprompts,weights = T2I._split_weighted_subprompts(prompts[0])
|
||||
if len(subprompts) > 1:
|
||||
# i dont know if this is correct.. but it works
|
||||
c = torch.zeros_like(uc)
|
||||
# get total weight for normalizing
|
||||
totalWeight = sum(weights)
|
||||
# normalize each "sub prompt" and add it
|
||||
for i in range(0,len(subprompts)):
|
||||
weight = weights[i]
|
||||
if not skip_normalize:
|
||||
weight = weight / totalWeight
|
||||
c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight)
|
||||
else: # just standard 1 prompt
|
||||
c = model.get_learned_conditioning(prompts)
|
||||
|
||||
seed = self._new_seed()
|
||||
|
||||
if grid:
|
||||
images = self._make_grid(samples=all_samples,
|
||||
seeds=seeds,
|
||||
batch_size=batch_size,
|
||||
iterations=iterations,
|
||||
outdir=outdir)
|
||||
shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor]
|
||||
samples_ddim, _ = sampler.sample(S=steps,
|
||||
conditioning=c,
|
||||
batch_size=batch_size,
|
||||
shape=shape,
|
||||
verbose=False,
|
||||
unconditional_guidance_scale=cfg_scale,
|
||||
unconditional_conditioning=uc,
|
||||
eta=ddim_eta,
|
||||
x_T=start_code)
|
||||
|
||||
x_samples_ddim = model.decode_first_stage(samples_ddim)
|
||||
x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
|
||||
if not grid:
|
||||
for x_sample in x_samples_ddim:
|
||||
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
|
||||
filename = self._unique_filename(outdir,previousname=filename,
|
||||
seed=seed,isbatch=(batch_size>1))
|
||||
assert not os.path.exists(filename)
|
||||
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
|
||||
images.append([filename,seed])
|
||||
else:
|
||||
all_samples.append(x_samples_ddim)
|
||||
seeds.append(seed)
|
||||
|
||||
image_count += 1
|
||||
seed = self._new_seed()
|
||||
if grid:
|
||||
images = self._make_grid(samples=all_samples,
|
||||
seeds=seeds,
|
||||
batch_size=batch_size,
|
||||
iterations=iterations,
|
||||
outdir=outdir)
|
||||
except KeyboardInterrupt:
|
||||
print('*interrupted*')
|
||||
print('Partial results will be returned; if --grid was requested, nothing will be returned.')
|
||||
except RuntimeError as e:
|
||||
print(str(e))
|
||||
|
||||
toc = time.time()
|
||||
print(f'{batch_size * iterations} images generated in',"%4.2fs"% (toc-tic))
|
||||
print(f'{image_count} images generated in',"%4.2fs"% (toc-tic))
|
||||
|
||||
return images
|
||||
|
||||
# There is lots of shared code between this and txt2img and should be refactored.
|
||||
def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
|
||||
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
||||
cfg_scale=None,ddim_eta=None,strength=None):
|
||||
cfg_scale=None,ddim_eta=None,strength=None,skip_normalize=False):
|
||||
"""
|
||||
Generate an image from the prompt and the initial image, writing iteration images into the outdir
|
||||
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
||||
@@ -305,54 +329,77 @@ The vast majority of these arguments default to reasonable values.
|
||||
images = list()
|
||||
seeds = list()
|
||||
filename = None
|
||||
|
||||
image_count = 0 # actual number of iterations performed
|
||||
tic = time.time()
|
||||
|
||||
with torch.no_grad():
|
||||
with precision_scope("cuda"):
|
||||
with model.ema_scope():
|
||||
all_samples = list()
|
||||
for n in trange(iterations, desc="Sampling"):
|
||||
seed_everything(seed)
|
||||
for prompts in tqdm(data, desc="data", dynamic_ncols=True):
|
||||
uc = None
|
||||
if cfg_scale != 1.0:
|
||||
uc = model.get_learned_conditioning(batch_size * [""])
|
||||
if isinstance(prompts, tuple):
|
||||
prompts = list(prompts)
|
||||
c = model.get_learned_conditioning(prompts)
|
||||
|
||||
# encode (scaled latent)
|
||||
z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device))
|
||||
# decode it
|
||||
samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale,
|
||||
unconditional_conditioning=uc,)
|
||||
# Gawd. Too many levels of indent here. Need to refactor into smaller routines!
|
||||
try:
|
||||
with torch.no_grad():
|
||||
with precision_scope("cuda"):
|
||||
with model.ema_scope():
|
||||
all_samples = list()
|
||||
for n in trange(iterations, desc="Sampling"):
|
||||
seed_everything(seed)
|
||||
for prompts in tqdm(data, desc="data", dynamic_ncols=True):
|
||||
uc = None
|
||||
if cfg_scale != 1.0:
|
||||
uc = model.get_learned_conditioning(batch_size * [""])
|
||||
if isinstance(prompts, tuple):
|
||||
prompts = list(prompts)
|
||||
|
||||
x_samples = model.decode_first_stage(samples)
|
||||
x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
# weighted sub-prompts
|
||||
subprompts,weights = T2I._split_weighted_subprompts(prompts[0])
|
||||
if len(subprompts) > 1:
|
||||
# i dont know if this is correct.. but it works
|
||||
c = torch.zeros_like(uc)
|
||||
# get total weight for normalizing
|
||||
totalWeight = sum(weights)
|
||||
# normalize each "sub prompt" and add it
|
||||
for i in range(0,len(subprompts)):
|
||||
weight = weights[i]
|
||||
if not skip_normalize:
|
||||
weight = weight / totalWeight
|
||||
c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight)
|
||||
else: # just standard 1 prompt
|
||||
c = model.get_learned_conditioning(prompts)
|
||||
|
||||
if not grid:
|
||||
for x_sample in x_samples:
|
||||
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
|
||||
filename = self._unique_filename(outdir,filename,seed=seed,isbatch=(batch_size>1))
|
||||
assert not os.path.exists(filename)
|
||||
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
|
||||
images.append([filename,seed])
|
||||
else:
|
||||
all_samples.append(x_samples)
|
||||
seeds.append(seed)
|
||||
# encode (scaled latent)
|
||||
z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device))
|
||||
# decode it
|
||||
samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale,
|
||||
unconditional_conditioning=uc,)
|
||||
|
||||
seed = self._new_seed()
|
||||
x_samples = model.decode_first_stage(samples)
|
||||
x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
|
||||
if grid:
|
||||
images = self._make_grid(samples=all_samples,
|
||||
seeds=seeds,
|
||||
batch_size=batch_size,
|
||||
iterations=iterations,
|
||||
outdir=outdir)
|
||||
if not grid:
|
||||
for x_sample in x_samples:
|
||||
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
|
||||
filename = self._unique_filename(outdir,previousname=filename,
|
||||
seed=seed,isbatch=(batch_size>1))
|
||||
assert not os.path.exists(filename)
|
||||
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
|
||||
images.append([filename,seed])
|
||||
else:
|
||||
all_samples.append(x_samples)
|
||||
seeds.append(seed)
|
||||
image_count +=1
|
||||
seed = self._new_seed()
|
||||
if grid:
|
||||
images = self._make_grid(samples=all_samples,
|
||||
seeds=seeds,
|
||||
batch_size=batch_size,
|
||||
iterations=iterations,
|
||||
outdir=outdir)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print('*interrupted*')
|
||||
print('Partial results will be returned; if --grid was requested, nothing will be returned.')
|
||||
except RuntimeError as e:
|
||||
print(str(e))
|
||||
|
||||
toc = time.time()
|
||||
print(f'{batch_size * iterations} images generated in',"%4.2fs"% (toc-tic))
|
||||
print(f'{image_count} images generated in',"%4.2fs"% (toc-tic))
|
||||
|
||||
return images
|
||||
|
||||
@@ -467,3 +514,48 @@ The vast majority of these arguments default to reasonable values.
|
||||
filename = f'{basecount:06}.{seed}.{series:02}.png'
|
||||
finished = not os.path.exists(os.path.join(outdir,filename))
|
||||
return os.path.join(outdir,filename)
|
||||
|
||||
def _split_weighted_subprompts(text):
|
||||
"""
|
||||
grabs all text up to the first occurrence of ':'
|
||||
uses the grabbed text as a sub-prompt, and takes the value following ':' as weight
|
||||
if ':' has no value defined, defaults to 1.0
|
||||
repeats until no text remaining
|
||||
"""
|
||||
remaining = len(text)
|
||||
prompts = []
|
||||
weights = []
|
||||
while remaining > 0:
|
||||
if ":" in text:
|
||||
idx = text.index(":") # first occurrence from start
|
||||
# grab up to index as sub-prompt
|
||||
prompt = text[:idx]
|
||||
remaining -= idx
|
||||
# remove from main text
|
||||
text = text[idx+1:]
|
||||
# find value for weight
|
||||
if " " in text:
|
||||
idx = text.index(" ") # first occurence
|
||||
else: # no space, read to end
|
||||
idx = len(text)
|
||||
if idx != 0:
|
||||
try:
|
||||
weight = float(text[:idx])
|
||||
except: # couldn't treat as float
|
||||
print(f"Warning: '{text[:idx]}' is not a value, are you missing a space?")
|
||||
weight = 1.0
|
||||
else: # no value found
|
||||
weight = 1.0
|
||||
# remove from main text
|
||||
remaining -= idx
|
||||
text = text[idx+1:]
|
||||
# append the sub-prompt and its weight
|
||||
prompts.append(prompt)
|
||||
weights.append(weight)
|
||||
else: # no : found
|
||||
if len(text) > 0: # there is still text though
|
||||
# take remainder as weight 1
|
||||
prompts.append(text)
|
||||
weights.append(1.0)
|
||||
remaining = 0
|
||||
return prompts, weights
|
||||
|
||||
@@ -67,29 +67,46 @@ def main():
|
||||
# gets rid of annoying messages about random seed
|
||||
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
|
||||
|
||||
infile = None
|
||||
try:
|
||||
if opt.infile is not None:
|
||||
infile = open(opt.infile,'r')
|
||||
except FileNotFoundError as e:
|
||||
print(e)
|
||||
exit(-1)
|
||||
|
||||
# preload the model
|
||||
if not debugging:
|
||||
t2i.load_model()
|
||||
print("\n* Initialization done! Awaiting your command (-h for help, 'q' to quit, 'cd' to change output dir, 'pwd' to print output dir)...")
|
||||
|
||||
log_path = os.path.join(opt.outdir,'..','dream_log.txt')
|
||||
log_path = os.path.join(opt.outdir,'dream_log.txt')
|
||||
with open(log_path,'a') as log:
|
||||
cmd_parser = create_cmd_parser()
|
||||
main_loop(t2i,cmd_parser,log)
|
||||
main_loop(t2i,cmd_parser,log,infile)
|
||||
log.close()
|
||||
if infile:
|
||||
infile.close()
|
||||
|
||||
|
||||
def main_loop(t2i,parser,log):
|
||||
def main_loop(t2i,parser,log,infile):
|
||||
''' prompt/read/execute loop '''
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
try:
|
||||
command = input("dream> ")
|
||||
command = infile.readline() if infile else input("dream> ")
|
||||
except EOFError:
|
||||
done = True
|
||||
break
|
||||
|
||||
if infile and len(command)==0:
|
||||
done = True
|
||||
break
|
||||
|
||||
if command.startswith(('#','//')):
|
||||
continue
|
||||
|
||||
try:
|
||||
elements = shlex.split(command)
|
||||
except ValueError as e:
|
||||
@@ -98,7 +115,7 @@ def main_loop(t2i,parser,log):
|
||||
|
||||
if len(elements)==0:
|
||||
continue
|
||||
|
||||
|
||||
if elements[0]=='q':
|
||||
done = True
|
||||
break
|
||||
@@ -141,19 +158,12 @@ def main_loop(t2i,parser,log):
|
||||
print("Try again with a prompt!")
|
||||
continue
|
||||
|
||||
try:
|
||||
if opt.init_img is None:
|
||||
results = t2i.txt2img(**vars(opt))
|
||||
else:
|
||||
results = t2i.img2img(**vars(opt))
|
||||
print("Outputs:")
|
||||
write_log_message(t2i,opt,results,log)
|
||||
except KeyboardInterrupt:
|
||||
print('*interrupted*')
|
||||
continue
|
||||
except RuntimeError as e:
|
||||
print(str(e))
|
||||
continue
|
||||
if opt.init_img is None:
|
||||
results = t2i.txt2img(**vars(opt))
|
||||
else:
|
||||
results = t2i.img2img(**vars(opt))
|
||||
print("Outputs:")
|
||||
write_log_message(t2i,opt,results,log)
|
||||
|
||||
|
||||
print("goodbye!")
|
||||
@@ -232,6 +242,10 @@ def create_argv_parser():
|
||||
dest='laion400m',
|
||||
action='store_true',
|
||||
help="fallback to the latent diffusion (laion400m) weights and config")
|
||||
parser.add_argument("--from_file",
|
||||
dest='infile',
|
||||
type=str,
|
||||
help="if specified, load prompts from this file")
|
||||
parser.add_argument('-n','--iterations',
|
||||
type=int,
|
||||
default=1,
|
||||
@@ -271,6 +285,7 @@ def create_cmd_parser():
|
||||
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
|
||||
parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)")
|
||||
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
|
||||
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
|
||||
return parser
|
||||
|
||||
if readline_available:
|
||||
|
||||
Reference in New Issue
Block a user