Run python black

This commit is contained in:
Brandon Rising
2023-07-28 09:46:44 -04:00
parent dc1148106d
commit 2b7b3dd4ba
148 changed files with 5543 additions and 6368 deletions

View File

@@ -47,6 +47,7 @@ from .diffusion import (
)
from .offloading import FullyLoadedModelGroup, ModelGroup
@dataclass
class PipelineIntermediateState:
run_id: str
@@ -72,7 +73,11 @@ class AddsMaskLatents:
initial_image_latents: torch.Tensor
def __call__(
self, latents: torch.Tensor, t: torch.Tensor, text_embeddings: torch.Tensor, **kwargs,
self,
latents: torch.Tensor,
t: torch.Tensor,
text_embeddings: torch.Tensor,
**kwargs,
) -> torch.Tensor:
model_input = self.add_mask_channels(latents)
return self.forward(model_input, t, text_embeddings, **kwargs)
@@ -80,12 +85,8 @@ class AddsMaskLatents:
def add_mask_channels(self, latents):
batch_size = latents.size(0)
# duplicate mask and latents for each batch
mask = einops.repeat(
self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size
)
image_latents = einops.repeat(
self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size
)
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
image_latents = einops.repeat(self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
# add mask and image as additional channels
model_input, _ = einops.pack([latents, mask, image_latents], "b * h w")
return model_input
@@ -103,9 +104,7 @@ class AddsMaskGuidance:
noise: torch.Tensor
_debug: Optional[Callable] = None
def __call__(
self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning
) -> BaseOutput:
def __call__(self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning) -> BaseOutput:
output_class = step_output.__class__ # We'll create a new one with masked data.
# The problem with taking SchedulerOutput instead of the model output is that we're less certain what's in it.
@@ -116,11 +115,7 @@ class AddsMaskGuidance:
# Mask anything that has the same shape as prev_sample, return others as-is.
return output_class(
{
k: (
self.apply_mask(v, self._t_for_field(k, t))
if are_like_tensors(prev_sample, v)
else v
)
k: (self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v)
for k, v in step_output.items()
}
)
@@ -132,9 +127,7 @@ class AddsMaskGuidance:
def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
batch_size = latents.size(0)
mask = einops.repeat(
self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size
)
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
if t.dim() == 0:
# some schedulers expect t to be one-dimensional.
# TODO: file diffusers bug about inconsistency?
@@ -144,12 +137,8 @@ class AddsMaskGuidance:
mask_latents = self.scheduler.add_noise(self.mask_latents, self.noise, t)
# TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
mask_latents = einops.repeat(
mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size
)
masked_input = torch.lerp(
mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)
)
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype))
if self._debug:
self._debug(masked_input, f"t={t} lerped")
return masked_input
@@ -159,9 +148,7 @@ def trim_to_multiple_of(*args, multiple_of=8):
return tuple((x - x % multiple_of) for x in args)
def image_resized_to_grid_as_tensor(
image: PIL.Image.Image, normalize: bool = True, multiple_of=8
) -> torch.FloatTensor:
def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool = True, multiple_of=8) -> torch.FloatTensor:
"""
:param image: input image
@@ -211,6 +198,7 @@ class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]):
raise AssertionError("why was that an empty generator?")
return result
@dataclass
class ControlNetData:
model: ControlNetModel = Field(default=None)
@@ -341,9 +329,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# FIXME: can't currently register control module
# control_model=control_model,
)
self.invokeai_diffuser = InvokeAIDiffuserComponent(
self.unet, self._unet_forward
)
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
self._model_group = FullyLoadedModelGroup(execution_device or self.unet.device)
self._model_group.install(*self._submodels)
@@ -354,11 +340,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if xformers is available, use it, otherwise use sliced attention.
"""
config = InvokeAIAppConfig.get_config()
if (
torch.cuda.is_available()
and is_xformers_available()
and not config.disable_xformers
):
if torch.cuda.is_available() and is_xformers_available() and not config.disable_xformers:
self.enable_xformers_memory_efficient_attention()
else:
if self.device.type == "cpu" or self.device.type == "mps":
@@ -369,9 +351,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
raise ValueError(f"unrecognized device {self.device}")
# input tensor of [1, 4, h/8, w/8]
# output tensor of [16, (h/8 * w/8), (h/8 * w/8)]
bytes_per_element_needed_for_baddbmm_duplication = (
latents.element_size() + 4
)
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
max_size_required_for_baddbmm = (
16
* latents.size(dim=2)
@@ -380,9 +360,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
* latents.size(dim=3)
* bytes_per_element_needed_for_baddbmm_duplication
)
if max_size_required_for_baddbmm > (
mem_free * 3.0 / 4.0
): # 3.3 / 4.0 is from old Invoke code
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code
self.enable_attention_slicing(slice_size="max")
elif torch.backends.mps.is_available():
# diffusers recommends always enabling for mps
@@ -470,7 +448,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
control_data: List[ControlNetData] = None,
) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
if self.scheduler.config.get("cpu_only", False):
scheduler_device = torch.device('cpu')
scheduler_device = torch.device("cpu")
else:
scheduler_device = self._model_group.device_for(self.unet)
@@ -488,7 +466,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
run_id=run_id,
additional_guidance=additional_guidance,
control_data=control_data,
callback=callback,
)
return result.latents, result.attention_map_saver
@@ -511,9 +488,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
additional_guidance = []
extra_conditioning_info = conditioning_data.extra
with self.invokeai_diffuser.custom_attention_context(
self.invokeai_diffuser.model,
extra_conditioning_info=extra_conditioning_info,
step_count=len(self.scheduler.timesteps),
self.invokeai_diffuser.model,
extra_conditioning_info=extra_conditioning_info,
step_count=len(self.scheduler.timesteps),
):
yield PipelineIntermediateState(
run_id=run_id,
@@ -607,16 +584,15 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# that are combined at higher level to make control_mode enum
# soft_injection determines whether to do per-layer re-weighting adjustment (if True)
# or default weighting (if False)
soft_injection = (control_mode == "more_prompt" or control_mode == "more_control")
soft_injection = control_mode == "more_prompt" or control_mode == "more_control"
# cfg_injection = determines whether to apply ControlNet to only the conditional (if True)
# or the default both conditional and unconditional (if False)
cfg_injection = (control_mode == "more_control" or control_mode == "unbalanced")
cfg_injection = control_mode == "more_control" or control_mode == "unbalanced"
first_control_step = math.floor(control_datum.begin_step_percent * total_step_count)
last_control_step = math.ceil(control_datum.end_step_percent * total_step_count)
# only apply controlnet if current step is within the controlnet's begin/end step range
if step_index >= first_control_step and step_index <= last_control_step:
if cfg_injection:
control_latent_input = unet_latent_input
else:
@@ -629,7 +605,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
encoder_hidden_states = conditioning_data.text_embeddings
encoder_attention_mask = None
else:
encoder_hidden_states, encoder_attention_mask = self.invokeai_diffuser._concat_conditionings_for_batch(
(
encoder_hidden_states,
encoder_attention_mask,
) = self.invokeai_diffuser._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings,
conditioning_data.text_embeddings,
)
@@ -646,9 +625,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
timestep=timestep,
encoder_hidden_states=encoder_hidden_states,
controlnet_cond=control_datum.image_tensor,
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
encoder_attention_mask=encoder_attention_mask,
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
return_dict=False,
)
if cfg_injection:
@@ -678,13 +657,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
step_index=step_index,
total_step_count=total_step_count,
down_block_additional_residuals=down_block_res_samples, # from controlnet(s)
mid_block_additional_residual=mid_block_res_sample, # from controlnet(s)
mid_block_additional_residual=mid_block_res_sample, # from controlnet(s)
)
# compute the previous noisy sample x_t -> x_t-1
step_output = self.scheduler.step(
noise_pred, timestep, latents, **conditioning_data.scheduler_args
)
step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args)
# TODO: this additional_guidance extension point feels redundant with InvokeAIDiffusionComponent.
# But the way things are now, scheduler runs _after_ that, so there was
@@ -710,17 +687,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# use of AddsMaskLatents.
latents = AddsMaskLatents(
self._unet_forward,
mask=torch.ones_like(
latents[:1, :1], device=latents.device, dtype=latents.dtype
),
initial_image_latents=torch.zeros_like(
latents[:1], device=latents.device, dtype=latents.dtype
),
mask=torch.ones_like(latents[:1, :1], device=latents.device, dtype=latents.dtype),
initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype),
).add_mask_channels(latents)
# First three args should be positional, not keywords, so torch hooks can see them.
return self.unet(
latents, t, text_embeddings, cross_attention_kwargs=cross_attention_kwargs,
latents,
t,
text_embeddings,
cross_attention_kwargs=cross_attention_kwargs,
**kwargs,
).sample
@@ -774,9 +750,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
) -> InvokeAIStableDiffusionPipelineOutput:
timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
result_latents, result_attention_maps = self.latents_from_embeddings(
latents=initial_latents if strength < 1.0 else torch.zeros_like(
initial_latents, device=initial_latents.device, dtype=initial_latents.dtype
),
latents=initial_latents
if strength < 1.0
else torch.zeros_like(initial_latents, device=initial_latents.device, dtype=initial_latents.dtype),
num_inference_steps=num_inference_steps,
conditioning_data=conditioning_data,
timesteps=timesteps,
@@ -797,14 +773,12 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
)
return self.check_for_safety(output, dtype=conditioning_data.dtype)
def get_img2img_timesteps(
self, num_inference_steps: int, strength: float, device=None
) -> (torch.Tensor, int):
def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int):
img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components)
assert img2img_pipeline.scheduler is self.scheduler
if self.scheduler.config.get("cpu_only", False):
scheduler_device = torch.device('cpu')
scheduler_device = torch.device("cpu")
else:
scheduler_device = self._model_group.device_for(self.unet)
@@ -849,18 +823,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# 6. Prepare latent variables
# can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents
# because we have our own noise function
init_image_latents = self.non_noised_latents_from_image(
init_image, device=device, dtype=latents_dtype
)
init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype)
if seed is not None:
set_seed(seed)
noise = noise_func(init_image_latents)
if mask.dim() == 3:
mask = mask.unsqueeze(0)
latent_mask = tv_resize(
mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR
).to(device=device, dtype=latents_dtype)
latent_mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR).to(
device=device, dtype=latents_dtype
)
guidance: List[Callable] = []
@@ -868,22 +840,20 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
# (that's why there's a mask!) but it seems to really want that blanked out.
masked_init_image = init_image * torch.where(mask < 0.5, 1, 0)
masked_latents = self.non_noised_latents_from_image(
masked_init_image, device=device, dtype=latents_dtype
)
masked_latents = self.non_noised_latents_from_image(masked_init_image, device=device, dtype=latents_dtype)
# TODO: we should probably pass this in so we don't have to try/finally around setting it.
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
self._unet_forward, latent_mask, masked_latents
)
else:
guidance.append(
AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise)
)
guidance.append(AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise))
try:
result_latents, result_attention_maps = self.latents_from_embeddings(
latents=init_image_latents if strength < 1.0 else torch.zeros_like(
latents=init_image_latents
if strength < 1.0
else torch.zeros_like(
init_image_latents, device=init_image_latents.device, dtype=init_image_latents.dtype
),
num_inference_steps=num_inference_steps,
@@ -914,18 +884,14 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
with torch.inference_mode():
self._model_group.load(self.vae)
init_latent_dist = self.vae.encode(init_image).latent_dist
init_latents = init_latent_dist.sample().to(
dtype=dtype
) # FIXME: uses torch.randn. make reproducible!
init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible!
init_latents = 0.18215 * init_latents
return init_latents
def check_for_safety(self, output, dtype):
with torch.inference_mode():
screened_images, has_nsfw_concept = self.run_safety_checker(
output.images, dtype=dtype
)
screened_images, has_nsfw_concept = self.run_safety_checker(output.images, dtype=dtype)
screened_attention_map_saver = None
if has_nsfw_concept is None or not has_nsfw_concept:
screened_attention_map_saver = output.attention_map_saver
@@ -949,9 +915,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
def debug_latents(self, latents, msg):
from invokeai.backend.image_util import debug_image
with torch.inference_mode():
decoded = self.numpy_to_pil(self.decode_latents(latents))
for i, img in enumerate(decoded):
debug_image(
img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
)
debug_image(img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True)