mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-04-23 03:00:31 -04:00
Run python black
This commit is contained in:
@@ -47,6 +47,7 @@ from .diffusion import (
|
||||
)
|
||||
from .offloading import FullyLoadedModelGroup, ModelGroup
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineIntermediateState:
|
||||
run_id: str
|
||||
@@ -72,7 +73,11 @@ class AddsMaskLatents:
|
||||
initial_image_latents: torch.Tensor
|
||||
|
||||
def __call__(
|
||||
self, latents: torch.Tensor, t: torch.Tensor, text_embeddings: torch.Tensor, **kwargs,
|
||||
self,
|
||||
latents: torch.Tensor,
|
||||
t: torch.Tensor,
|
||||
text_embeddings: torch.Tensor,
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
model_input = self.add_mask_channels(latents)
|
||||
return self.forward(model_input, t, text_embeddings, **kwargs)
|
||||
@@ -80,12 +85,8 @@ class AddsMaskLatents:
|
||||
def add_mask_channels(self, latents):
|
||||
batch_size = latents.size(0)
|
||||
# duplicate mask and latents for each batch
|
||||
mask = einops.repeat(
|
||||
self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size
|
||||
)
|
||||
image_latents = einops.repeat(
|
||||
self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size
|
||||
)
|
||||
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
image_latents = einops.repeat(self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
# add mask and image as additional channels
|
||||
model_input, _ = einops.pack([latents, mask, image_latents], "b * h w")
|
||||
return model_input
|
||||
@@ -103,9 +104,7 @@ class AddsMaskGuidance:
|
||||
noise: torch.Tensor
|
||||
_debug: Optional[Callable] = None
|
||||
|
||||
def __call__(
|
||||
self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning
|
||||
) -> BaseOutput:
|
||||
def __call__(self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning) -> BaseOutput:
|
||||
output_class = step_output.__class__ # We'll create a new one with masked data.
|
||||
|
||||
# The problem with taking SchedulerOutput instead of the model output is that we're less certain what's in it.
|
||||
@@ -116,11 +115,7 @@ class AddsMaskGuidance:
|
||||
# Mask anything that has the same shape as prev_sample, return others as-is.
|
||||
return output_class(
|
||||
{
|
||||
k: (
|
||||
self.apply_mask(v, self._t_for_field(k, t))
|
||||
if are_like_tensors(prev_sample, v)
|
||||
else v
|
||||
)
|
||||
k: (self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v)
|
||||
for k, v in step_output.items()
|
||||
}
|
||||
)
|
||||
@@ -132,9 +127,7 @@ class AddsMaskGuidance:
|
||||
|
||||
def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
|
||||
batch_size = latents.size(0)
|
||||
mask = einops.repeat(
|
||||
self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size
|
||||
)
|
||||
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
if t.dim() == 0:
|
||||
# some schedulers expect t to be one-dimensional.
|
||||
# TODO: file diffusers bug about inconsistency?
|
||||
@@ -144,12 +137,8 @@ class AddsMaskGuidance:
|
||||
mask_latents = self.scheduler.add_noise(self.mask_latents, self.noise, t)
|
||||
# TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
|
||||
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
|
||||
mask_latents = einops.repeat(
|
||||
mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size
|
||||
)
|
||||
masked_input = torch.lerp(
|
||||
mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)
|
||||
)
|
||||
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype))
|
||||
if self._debug:
|
||||
self._debug(masked_input, f"t={t} lerped")
|
||||
return masked_input
|
||||
@@ -159,9 +148,7 @@ def trim_to_multiple_of(*args, multiple_of=8):
|
||||
return tuple((x - x % multiple_of) for x in args)
|
||||
|
||||
|
||||
def image_resized_to_grid_as_tensor(
|
||||
image: PIL.Image.Image, normalize: bool = True, multiple_of=8
|
||||
) -> torch.FloatTensor:
|
||||
def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool = True, multiple_of=8) -> torch.FloatTensor:
|
||||
"""
|
||||
|
||||
:param image: input image
|
||||
@@ -211,6 +198,7 @@ class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]):
|
||||
raise AssertionError("why was that an empty generator?")
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlNetData:
|
||||
model: ControlNetModel = Field(default=None)
|
||||
@@ -341,9 +329,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# FIXME: can't currently register control module
|
||||
# control_model=control_model,
|
||||
)
|
||||
self.invokeai_diffuser = InvokeAIDiffuserComponent(
|
||||
self.unet, self._unet_forward
|
||||
)
|
||||
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
|
||||
|
||||
self._model_group = FullyLoadedModelGroup(execution_device or self.unet.device)
|
||||
self._model_group.install(*self._submodels)
|
||||
@@ -354,11 +340,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
if xformers is available, use it, otherwise use sliced attention.
|
||||
"""
|
||||
config = InvokeAIAppConfig.get_config()
|
||||
if (
|
||||
torch.cuda.is_available()
|
||||
and is_xformers_available()
|
||||
and not config.disable_xformers
|
||||
):
|
||||
if torch.cuda.is_available() and is_xformers_available() and not config.disable_xformers:
|
||||
self.enable_xformers_memory_efficient_attention()
|
||||
else:
|
||||
if self.device.type == "cpu" or self.device.type == "mps":
|
||||
@@ -369,9 +351,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
raise ValueError(f"unrecognized device {self.device}")
|
||||
# input tensor of [1, 4, h/8, w/8]
|
||||
# output tensor of [16, (h/8 * w/8), (h/8 * w/8)]
|
||||
bytes_per_element_needed_for_baddbmm_duplication = (
|
||||
latents.element_size() + 4
|
||||
)
|
||||
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
|
||||
max_size_required_for_baddbmm = (
|
||||
16
|
||||
* latents.size(dim=2)
|
||||
@@ -380,9 +360,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
* latents.size(dim=3)
|
||||
* bytes_per_element_needed_for_baddbmm_duplication
|
||||
)
|
||||
if max_size_required_for_baddbmm > (
|
||||
mem_free * 3.0 / 4.0
|
||||
): # 3.3 / 4.0 is from old Invoke code
|
||||
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code
|
||||
self.enable_attention_slicing(slice_size="max")
|
||||
elif torch.backends.mps.is_available():
|
||||
# diffusers recommends always enabling for mps
|
||||
@@ -470,7 +448,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
control_data: List[ControlNetData] = None,
|
||||
) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
|
||||
if self.scheduler.config.get("cpu_only", False):
|
||||
scheduler_device = torch.device('cpu')
|
||||
scheduler_device = torch.device("cpu")
|
||||
else:
|
||||
scheduler_device = self._model_group.device_for(self.unet)
|
||||
|
||||
@@ -488,7 +466,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
run_id=run_id,
|
||||
additional_guidance=additional_guidance,
|
||||
control_data=control_data,
|
||||
|
||||
callback=callback,
|
||||
)
|
||||
return result.latents, result.attention_map_saver
|
||||
@@ -511,9 +488,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
additional_guidance = []
|
||||
extra_conditioning_info = conditioning_data.extra
|
||||
with self.invokeai_diffuser.custom_attention_context(
|
||||
self.invokeai_diffuser.model,
|
||||
extra_conditioning_info=extra_conditioning_info,
|
||||
step_count=len(self.scheduler.timesteps),
|
||||
self.invokeai_diffuser.model,
|
||||
extra_conditioning_info=extra_conditioning_info,
|
||||
step_count=len(self.scheduler.timesteps),
|
||||
):
|
||||
yield PipelineIntermediateState(
|
||||
run_id=run_id,
|
||||
@@ -607,16 +584,15 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# that are combined at higher level to make control_mode enum
|
||||
# soft_injection determines whether to do per-layer re-weighting adjustment (if True)
|
||||
# or default weighting (if False)
|
||||
soft_injection = (control_mode == "more_prompt" or control_mode == "more_control")
|
||||
soft_injection = control_mode == "more_prompt" or control_mode == "more_control"
|
||||
# cfg_injection = determines whether to apply ControlNet to only the conditional (if True)
|
||||
# or the default both conditional and unconditional (if False)
|
||||
cfg_injection = (control_mode == "more_control" or control_mode == "unbalanced")
|
||||
cfg_injection = control_mode == "more_control" or control_mode == "unbalanced"
|
||||
|
||||
first_control_step = math.floor(control_datum.begin_step_percent * total_step_count)
|
||||
last_control_step = math.ceil(control_datum.end_step_percent * total_step_count)
|
||||
# only apply controlnet if current step is within the controlnet's begin/end step range
|
||||
if step_index >= first_control_step and step_index <= last_control_step:
|
||||
|
||||
if cfg_injection:
|
||||
control_latent_input = unet_latent_input
|
||||
else:
|
||||
@@ -629,7 +605,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
encoder_hidden_states = conditioning_data.text_embeddings
|
||||
encoder_attention_mask = None
|
||||
else:
|
||||
encoder_hidden_states, encoder_attention_mask = self.invokeai_diffuser._concat_conditionings_for_batch(
|
||||
(
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
) = self.invokeai_diffuser._concat_conditionings_for_batch(
|
||||
conditioning_data.unconditioned_embeddings,
|
||||
conditioning_data.text_embeddings,
|
||||
)
|
||||
@@ -646,9 +625,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
timestep=timestep,
|
||||
encoder_hidden_states=encoder_hidden_states,
|
||||
controlnet_cond=control_datum.image_tensor,
|
||||
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
|
||||
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
|
||||
encoder_attention_mask=encoder_attention_mask,
|
||||
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
|
||||
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
|
||||
return_dict=False,
|
||||
)
|
||||
if cfg_injection:
|
||||
@@ -678,13 +657,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
step_index=step_index,
|
||||
total_step_count=total_step_count,
|
||||
down_block_additional_residuals=down_block_res_samples, # from controlnet(s)
|
||||
mid_block_additional_residual=mid_block_res_sample, # from controlnet(s)
|
||||
mid_block_additional_residual=mid_block_res_sample, # from controlnet(s)
|
||||
)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
step_output = self.scheduler.step(
|
||||
noise_pred, timestep, latents, **conditioning_data.scheduler_args
|
||||
)
|
||||
step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args)
|
||||
|
||||
# TODO: this additional_guidance extension point feels redundant with InvokeAIDiffusionComponent.
|
||||
# But the way things are now, scheduler runs _after_ that, so there was
|
||||
@@ -710,17 +687,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# use of AddsMaskLatents.
|
||||
latents = AddsMaskLatents(
|
||||
self._unet_forward,
|
||||
mask=torch.ones_like(
|
||||
latents[:1, :1], device=latents.device, dtype=latents.dtype
|
||||
),
|
||||
initial_image_latents=torch.zeros_like(
|
||||
latents[:1], device=latents.device, dtype=latents.dtype
|
||||
),
|
||||
mask=torch.ones_like(latents[:1, :1], device=latents.device, dtype=latents.dtype),
|
||||
initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype),
|
||||
).add_mask_channels(latents)
|
||||
|
||||
# First three args should be positional, not keywords, so torch hooks can see them.
|
||||
return self.unet(
|
||||
latents, t, text_embeddings, cross_attention_kwargs=cross_attention_kwargs,
|
||||
latents,
|
||||
t,
|
||||
text_embeddings,
|
||||
cross_attention_kwargs=cross_attention_kwargs,
|
||||
**kwargs,
|
||||
).sample
|
||||
|
||||
@@ -774,9 +750,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
) -> InvokeAIStableDiffusionPipelineOutput:
|
||||
timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
|
||||
result_latents, result_attention_maps = self.latents_from_embeddings(
|
||||
latents=initial_latents if strength < 1.0 else torch.zeros_like(
|
||||
initial_latents, device=initial_latents.device, dtype=initial_latents.dtype
|
||||
),
|
||||
latents=initial_latents
|
||||
if strength < 1.0
|
||||
else torch.zeros_like(initial_latents, device=initial_latents.device, dtype=initial_latents.dtype),
|
||||
num_inference_steps=num_inference_steps,
|
||||
conditioning_data=conditioning_data,
|
||||
timesteps=timesteps,
|
||||
@@ -797,14 +773,12 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
)
|
||||
return self.check_for_safety(output, dtype=conditioning_data.dtype)
|
||||
|
||||
def get_img2img_timesteps(
|
||||
self, num_inference_steps: int, strength: float, device=None
|
||||
) -> (torch.Tensor, int):
|
||||
def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int):
|
||||
img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components)
|
||||
assert img2img_pipeline.scheduler is self.scheduler
|
||||
|
||||
if self.scheduler.config.get("cpu_only", False):
|
||||
scheduler_device = torch.device('cpu')
|
||||
scheduler_device = torch.device("cpu")
|
||||
else:
|
||||
scheduler_device = self._model_group.device_for(self.unet)
|
||||
|
||||
@@ -849,18 +823,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# 6. Prepare latent variables
|
||||
# can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents
|
||||
# because we have our own noise function
|
||||
init_image_latents = self.non_noised_latents_from_image(
|
||||
init_image, device=device, dtype=latents_dtype
|
||||
)
|
||||
init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype)
|
||||
if seed is not None:
|
||||
set_seed(seed)
|
||||
noise = noise_func(init_image_latents)
|
||||
|
||||
if mask.dim() == 3:
|
||||
mask = mask.unsqueeze(0)
|
||||
latent_mask = tv_resize(
|
||||
mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR
|
||||
).to(device=device, dtype=latents_dtype)
|
||||
latent_mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR).to(
|
||||
device=device, dtype=latents_dtype
|
||||
)
|
||||
|
||||
guidance: List[Callable] = []
|
||||
|
||||
@@ -868,22 +840,20 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
|
||||
# (that's why there's a mask!) but it seems to really want that blanked out.
|
||||
masked_init_image = init_image * torch.where(mask < 0.5, 1, 0)
|
||||
masked_latents = self.non_noised_latents_from_image(
|
||||
masked_init_image, device=device, dtype=latents_dtype
|
||||
)
|
||||
masked_latents = self.non_noised_latents_from_image(masked_init_image, device=device, dtype=latents_dtype)
|
||||
|
||||
# TODO: we should probably pass this in so we don't have to try/finally around setting it.
|
||||
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
|
||||
self._unet_forward, latent_mask, masked_latents
|
||||
)
|
||||
else:
|
||||
guidance.append(
|
||||
AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise)
|
||||
)
|
||||
guidance.append(AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise))
|
||||
|
||||
try:
|
||||
result_latents, result_attention_maps = self.latents_from_embeddings(
|
||||
latents=init_image_latents if strength < 1.0 else torch.zeros_like(
|
||||
latents=init_image_latents
|
||||
if strength < 1.0
|
||||
else torch.zeros_like(
|
||||
init_image_latents, device=init_image_latents.device, dtype=init_image_latents.dtype
|
||||
),
|
||||
num_inference_steps=num_inference_steps,
|
||||
@@ -914,18 +884,14 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
with torch.inference_mode():
|
||||
self._model_group.load(self.vae)
|
||||
init_latent_dist = self.vae.encode(init_image).latent_dist
|
||||
init_latents = init_latent_dist.sample().to(
|
||||
dtype=dtype
|
||||
) # FIXME: uses torch.randn. make reproducible!
|
||||
init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible!
|
||||
|
||||
init_latents = 0.18215 * init_latents
|
||||
return init_latents
|
||||
|
||||
def check_for_safety(self, output, dtype):
|
||||
with torch.inference_mode():
|
||||
screened_images, has_nsfw_concept = self.run_safety_checker(
|
||||
output.images, dtype=dtype
|
||||
)
|
||||
screened_images, has_nsfw_concept = self.run_safety_checker(output.images, dtype=dtype)
|
||||
screened_attention_map_saver = None
|
||||
if has_nsfw_concept is None or not has_nsfw_concept:
|
||||
screened_attention_map_saver = output.attention_map_saver
|
||||
@@ -949,9 +915,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
|
||||
def debug_latents(self, latents, msg):
|
||||
from invokeai.backend.image_util import debug_image
|
||||
|
||||
with torch.inference_mode():
|
||||
decoded = self.numpy_to_pil(self.decode_latents(latents))
|
||||
for i, img in enumerate(decoded):
|
||||
debug_image(
|
||||
img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
|
||||
)
|
||||
debug_image(img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True)
|
||||
|
||||
Reference in New Issue
Block a user