mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-01-25 05:57:59 -05:00
Compare commits
92 Commits
psyche/mm-
...
ryan/multi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6bcf48aa37 | ||
|
|
b1bb1511fe | ||
|
|
99046a8145 | ||
|
|
72be7e71e3 | ||
|
|
35adaf1c17 | ||
|
|
865c2335de | ||
|
|
49ca42f84a | ||
|
|
493fcd8660 | ||
|
|
20322d781e | ||
|
|
889d13e02a | ||
|
|
6ccd2a867b | ||
|
|
5861fa1719 | ||
|
|
dfd4beb62b | ||
|
|
83df0c0df5 | ||
|
|
c58c4069a7 | ||
|
|
3937fffa94 | ||
|
|
bbf5f67691 | ||
|
|
2f5c147b84 | ||
|
|
bd2839b748 | ||
|
|
4f70dd7ce1 | ||
|
|
066672fbfd | ||
|
|
abefaee4d1 | ||
|
|
3254ba5904 | ||
|
|
73a8c55852 | ||
|
|
f82af7c22d | ||
|
|
3aef717ef4 | ||
|
|
c2cf1137e9 | ||
|
|
803a24bc0a | ||
|
|
7d24ad8ccd | ||
|
|
cb389063b2 | ||
|
|
81b8a69e1a | ||
|
|
7ee5db87ad | ||
|
|
66cf2c59bd | ||
|
|
3bad1367e9 | ||
|
|
867a7642a6 | ||
|
|
d9d1c8f9cb | ||
|
|
e03eb7fb45 | ||
|
|
85db33bc7e | ||
|
|
93e3a2b504 | ||
|
|
6a7a26f1bf | ||
|
|
08ca03ef9f | ||
|
|
ccf90b6bd6 | ||
|
|
753239b48d | ||
|
|
65fa4664c9 | ||
|
|
297570ded3 | ||
|
|
680fdcf293 | ||
|
|
5ff91f2c44 | ||
|
|
69aa7057e7 | ||
|
|
d3932f40de | ||
|
|
ee74cd7fab | ||
|
|
bda25b40c9 | ||
|
|
7e9a89f8c6 | ||
|
|
79ceac2f82 | ||
|
|
8e47e005a7 | ||
|
|
d13aafb514 | ||
|
|
63a7e19dbf | ||
|
|
fbc5a8ec65 | ||
|
|
8ce6e4540e | ||
|
|
f14f377ede | ||
|
|
1925f83f5e | ||
|
|
3a5ad6d112 | ||
|
|
41a6bb45f3 | ||
|
|
70e40fa6c1 | ||
|
|
e26125b734 | ||
|
|
cd70937b7f | ||
|
|
f002bca2fa | ||
|
|
56771de856 | ||
|
|
c11478a94a | ||
|
|
fb694b3e17 | ||
|
|
1bc98abc76 | ||
|
|
7f03b04b2f | ||
|
|
4029972530 | ||
|
|
328f160e88 | ||
|
|
aae318425d | ||
|
|
785bb1d9e4 | ||
|
|
a3cb5da130 | ||
|
|
568a4844f7 | ||
|
|
b1e56e2485 | ||
|
|
9432336e2b | ||
|
|
7d19af2caa | ||
|
|
0dbec3ad8b | ||
|
|
52c0c4a32f | ||
|
|
8f1afc032a | ||
|
|
854bca668a | ||
|
|
fea9013cad | ||
|
|
045caddee1 | ||
|
|
58697141bf | ||
|
|
5e419dbb56 | ||
|
|
595096bdcf | ||
|
|
ed03d281e6 | ||
|
|
0b37496c57 | ||
|
|
fde58ce0a3 |
@@ -9,7 +9,7 @@ from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from fastapi import Body, Path, Query, Response, UploadFile
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.responses import FileResponse, HTMLResponse
|
||||
from fastapi.routing import APIRouter
|
||||
from PIL import Image
|
||||
from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field
|
||||
@@ -502,6 +502,133 @@ async def install_model(
|
||||
return result
|
||||
|
||||
|
||||
@model_manager_router.get(
|
||||
"/install/huggingface",
|
||||
operation_id="install_hugging_face_model",
|
||||
responses={
|
||||
201: {"description": "The model is being installed"},
|
||||
400: {"description": "Bad request"},
|
||||
409: {"description": "There is already a model corresponding to this path or repo_id"},
|
||||
},
|
||||
status_code=201,
|
||||
response_class=HTMLResponse,
|
||||
)
|
||||
async def install_hugging_face_model(
|
||||
source: str = Query(description="HuggingFace repo_id to install"),
|
||||
) -> HTMLResponse:
|
||||
"""Install a Hugging Face model using a string identifier."""
|
||||
|
||||
def generate_html(title: str, heading: str, repo_id: str, is_error: bool, message: str | None = "") -> str:
|
||||
if message:
|
||||
message = f"<p>{message}</p>"
|
||||
title_class = "error" if is_error else "success"
|
||||
return f"""
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
body {{
|
||||
text-align: center;
|
||||
background-color: hsl(220 12% 10% / 1);
|
||||
font-family: Helvetica, sans-serif;
|
||||
color: hsl(220 12% 86% / 1);
|
||||
}}
|
||||
|
||||
.repo-id {{
|
||||
color: hsl(220 12% 68% / 1);
|
||||
}}
|
||||
|
||||
.error {{
|
||||
color: hsl(0 42% 68% / 1)
|
||||
}}
|
||||
|
||||
.message-box {{
|
||||
display: inline-block;
|
||||
border-radius: 5px;
|
||||
background-color: hsl(220 12% 20% / 1);
|
||||
padding-inline-end: 30px;
|
||||
padding: 20px;
|
||||
padding-inline-start: 30px;
|
||||
padding-inline-end: 30px;
|
||||
}}
|
||||
|
||||
.container {{
|
||||
display: flex;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}}
|
||||
|
||||
a {{
|
||||
color: inherit
|
||||
}}
|
||||
|
||||
a:visited {{
|
||||
color: inherit
|
||||
}}
|
||||
|
||||
a:active {{
|
||||
color: inherit
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body style="background-color: hsl(220 12% 10% / 1);">
|
||||
<div class="container">
|
||||
<div class="message-box">
|
||||
<h2 class="{title_class}">{heading}</h2>
|
||||
{message}
|
||||
<p class="repo-id">Repo ID: {repo_id}</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
"""
|
||||
|
||||
try:
|
||||
metadata = HuggingFaceMetadataFetch().from_id(source)
|
||||
assert isinstance(metadata, ModelMetadataWithFiles)
|
||||
except UnknownMetadataException:
|
||||
title = "Unable to Install Model"
|
||||
heading = "No HuggingFace repository found with that repo ID."
|
||||
message = "Ensure the repo ID is correct and try again."
|
||||
return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=400)
|
||||
|
||||
logger = ApiDependencies.invoker.services.logger
|
||||
|
||||
try:
|
||||
installer = ApiDependencies.invoker.services.model_manager.install
|
||||
if metadata.is_diffusers:
|
||||
installer.heuristic_import(
|
||||
source=source,
|
||||
inplace=False,
|
||||
)
|
||||
elif metadata.ckpt_urls is not None and len(metadata.ckpt_urls) == 1:
|
||||
installer.heuristic_import(
|
||||
source=str(metadata.ckpt_urls[0]),
|
||||
inplace=False,
|
||||
)
|
||||
else:
|
||||
title = "Unable to Install Model"
|
||||
heading = "This HuggingFace repo has multiple models."
|
||||
message = "Please use the Model Manager to install this model."
|
||||
return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=200)
|
||||
|
||||
title = "Model Install Started"
|
||||
heading = "Your HuggingFace model is installing now."
|
||||
message = "You can close this tab and check the Model Manager for installation progress."
|
||||
return HTMLResponse(content=generate_html(title, heading, source, False, message), status_code=201)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
title = "Unable to Install Model"
|
||||
heading = "There was an problem installing this model."
|
||||
message = 'Please use the Model Manager directly to install this model. If the issue persists, ask for help on <a href="https://discord.gg/ZmtBAhwWhy">discord</a>.'
|
||||
return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=500)
|
||||
|
||||
|
||||
@model_manager_router.get(
|
||||
"/install",
|
||||
operation_id="list_model_installs",
|
||||
|
||||
98
invokeai/app/invocations/blend_latents.py
Normal file
98
invokeai/app/invocations/blend_latents.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from typing import Any, Union
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import torch
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
|
||||
@invocation(
|
||||
"lblend",
|
||||
title="Blend Latents",
|
||||
tags=["latents", "blend"],
|
||||
category="latents",
|
||||
version="1.0.3",
|
||||
)
|
||||
class BlendLatentsInvocation(BaseInvocation):
|
||||
"""Blend two latents using a given alpha. Latents must have same size."""
|
||||
|
||||
latents_a: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
latents_b: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
alpha: float = InputField(default=0.5, description=FieldDescriptions.blend_alpha)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
latents_a = context.tensors.load(self.latents_a.latents_name)
|
||||
latents_b = context.tensors.load(self.latents_b.latents_name)
|
||||
|
||||
if latents_a.shape != latents_b.shape:
|
||||
raise Exception("Latents to blend must be the same size.")
|
||||
|
||||
device = TorchDevice.choose_torch_device()
|
||||
|
||||
def slerp(
|
||||
t: Union[float, npt.NDArray[Any]], # FIXME: maybe use np.float32 here?
|
||||
v0: Union[torch.Tensor, npt.NDArray[Any]],
|
||||
v1: Union[torch.Tensor, npt.NDArray[Any]],
|
||||
DOT_THRESHOLD: float = 0.9995,
|
||||
) -> Union[torch.Tensor, npt.NDArray[Any]]:
|
||||
"""
|
||||
Spherical linear interpolation
|
||||
Args:
|
||||
t (float/np.ndarray): Float value between 0.0 and 1.0
|
||||
v0 (np.ndarray): Starting vector
|
||||
v1 (np.ndarray): Final vector
|
||||
DOT_THRESHOLD (float): Threshold for considering the two vectors as
|
||||
colineal. Not recommended to alter this.
|
||||
Returns:
|
||||
v2 (np.ndarray): Interpolation vector between v0 and v1
|
||||
"""
|
||||
inputs_are_torch = False
|
||||
if not isinstance(v0, np.ndarray):
|
||||
inputs_are_torch = True
|
||||
v0 = v0.detach().cpu().numpy()
|
||||
if not isinstance(v1, np.ndarray):
|
||||
inputs_are_torch = True
|
||||
v1 = v1.detach().cpu().numpy()
|
||||
|
||||
dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
|
||||
if np.abs(dot) > DOT_THRESHOLD:
|
||||
v2 = (1 - t) * v0 + t * v1
|
||||
else:
|
||||
theta_0 = np.arccos(dot)
|
||||
sin_theta_0 = np.sin(theta_0)
|
||||
theta_t = theta_0 * t
|
||||
sin_theta_t = np.sin(theta_t)
|
||||
s0 = np.sin(theta_0 - theta_t) / sin_theta_0
|
||||
s1 = sin_theta_t / sin_theta_0
|
||||
v2 = s0 * v0 + s1 * v1
|
||||
|
||||
if inputs_are_torch:
|
||||
v2_torch: torch.Tensor = torch.from_numpy(v2).to(device)
|
||||
return v2_torch
|
||||
else:
|
||||
assert isinstance(v2, np.ndarray)
|
||||
return v2
|
||||
|
||||
# blend
|
||||
bl = slerp(self.alpha, latents_a, latents_b)
|
||||
assert isinstance(bl, torch.Tensor)
|
||||
blended_latents: torch.Tensor = bl # for type checking convenience
|
||||
|
||||
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
|
||||
blended_latents = blended_latents.to("cpu")
|
||||
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
name = context.tensors.save(tensor=blended_latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed)
|
||||
@@ -1,6 +1,7 @@
|
||||
from typing import Literal
|
||||
|
||||
from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
LATENT_SCALE_FACTOR = 8
|
||||
"""
|
||||
@@ -15,3 +16,5 @@ SCHEDULER_NAME_VALUES = Literal[tuple(SCHEDULER_MAP.keys())]
|
||||
|
||||
IMAGE_MODES = Literal["L", "RGB", "RGBA", "CMYK", "YCbCr", "LAB", "HSV", "I", "F"]
|
||||
"""A literal type for PIL image modes supported by Invoke"""
|
||||
|
||||
DEFAULT_PRECISION = TorchDevice.choose_torch_dtype()
|
||||
|
||||
80
invokeai/app/invocations/create_denoise_mask.py
Normal file
80
invokeai/app/invocations/create_denoise_mask.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torchvision.transforms as T
|
||||
from PIL import Image
|
||||
from torchvision.transforms.functional import resize as tv_resize
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import DEFAULT_PRECISION
|
||||
from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField
|
||||
from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
|
||||
from invokeai.app.invocations.model import VAEField
|
||||
from invokeai.app.invocations.primitives import DenoiseMaskOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
|
||||
|
||||
|
||||
@invocation(
|
||||
"create_denoise_mask",
|
||||
title="Create Denoise Mask",
|
||||
tags=["mask", "denoise"],
|
||||
category="latents",
|
||||
version="1.0.2",
|
||||
)
|
||||
class CreateDenoiseMaskInvocation(BaseInvocation):
|
||||
"""Creates mask for denoising model run."""
|
||||
|
||||
vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0)
|
||||
image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1)
|
||||
mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2)
|
||||
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3)
|
||||
fp32: bool = InputField(
|
||||
default=DEFAULT_PRECISION == torch.float32,
|
||||
description=FieldDescriptions.fp32,
|
||||
ui_order=4,
|
||||
)
|
||||
|
||||
def prep_mask_tensor(self, mask_image: Image.Image) -> torch.Tensor:
|
||||
if mask_image.mode != "L":
|
||||
mask_image = mask_image.convert("L")
|
||||
mask_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
|
||||
if mask_tensor.dim() == 3:
|
||||
mask_tensor = mask_tensor.unsqueeze(0)
|
||||
# if shape is not None:
|
||||
# mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR)
|
||||
return mask_tensor
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> DenoiseMaskOutput:
|
||||
if self.image is not None:
|
||||
image = context.images.get_pil(self.image.image_name)
|
||||
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
|
||||
if image_tensor.dim() == 3:
|
||||
image_tensor = image_tensor.unsqueeze(0)
|
||||
else:
|
||||
image_tensor = None
|
||||
|
||||
mask = self.prep_mask_tensor(
|
||||
context.images.get_pil(self.mask.image_name),
|
||||
)
|
||||
|
||||
if image_tensor is not None:
|
||||
vae_info = context.models.load(self.vae.vae)
|
||||
|
||||
img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
|
||||
masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
|
||||
# TODO:
|
||||
masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone())
|
||||
|
||||
masked_latents_name = context.tensors.save(tensor=masked_latents)
|
||||
else:
|
||||
masked_latents_name = None
|
||||
|
||||
mask_name = context.tensors.save(tensor=mask)
|
||||
|
||||
return DenoiseMaskOutput.build(
|
||||
mask_name=mask_name,
|
||||
masked_latents_name=masked_latents_name,
|
||||
gradient=False,
|
||||
)
|
||||
138
invokeai/app/invocations/create_gradient_mask.py
Normal file
138
invokeai/app/invocations/create_gradient_mask.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from typing import Literal, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision.transforms as T
|
||||
from PIL import Image, ImageFilter
|
||||
from torchvision.transforms.functional import resize as tv_resize
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
|
||||
from invokeai.app.invocations.constants import DEFAULT_PRECISION
|
||||
from invokeai.app.invocations.fields import (
|
||||
DenoiseMaskField,
|
||||
FieldDescriptions,
|
||||
ImageField,
|
||||
Input,
|
||||
InputField,
|
||||
OutputField,
|
||||
)
|
||||
from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
|
||||
from invokeai.app.invocations.model import UNetField, VAEField
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.model_manager import LoadedModel
|
||||
from invokeai.backend.model_manager.config import MainConfigBase, ModelVariantType
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
|
||||
|
||||
|
||||
@invocation_output("gradient_mask_output")
|
||||
class GradientMaskOutput(BaseInvocationOutput):
|
||||
"""Outputs a denoise mask and an image representing the total gradient of the mask."""
|
||||
|
||||
denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run")
|
||||
expanded_mask_area: ImageField = OutputField(
|
||||
description="Image representing the total gradient area of the mask. For paste-back purposes."
|
||||
)
|
||||
|
||||
|
||||
@invocation(
|
||||
"create_gradient_mask",
|
||||
title="Create Gradient Mask",
|
||||
tags=["mask", "denoise"],
|
||||
category="latents",
|
||||
version="1.1.0",
|
||||
)
|
||||
class CreateGradientMaskInvocation(BaseInvocation):
|
||||
"""Creates mask for denoising model run."""
|
||||
|
||||
mask: ImageField = InputField(default=None, description="Image which will be masked", ui_order=1)
|
||||
edge_radius: int = InputField(
|
||||
default=16, ge=0, description="How far to blur/expand the edges of the mask", ui_order=2
|
||||
)
|
||||
coherence_mode: Literal["Gaussian Blur", "Box Blur", "Staged"] = InputField(default="Gaussian Blur", ui_order=3)
|
||||
minimum_denoise: float = InputField(
|
||||
default=0.0, ge=0, le=1, description="Minimum denoise level for the coherence region", ui_order=4
|
||||
)
|
||||
image: Optional[ImageField] = InputField(
|
||||
default=None,
|
||||
description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
|
||||
title="[OPTIONAL] Image",
|
||||
ui_order=6,
|
||||
)
|
||||
unet: Optional[UNetField] = InputField(
|
||||
description="OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE",
|
||||
default=None,
|
||||
input=Input.Connection,
|
||||
title="[OPTIONAL] UNet",
|
||||
ui_order=5,
|
||||
)
|
||||
vae: Optional[VAEField] = InputField(
|
||||
default=None,
|
||||
description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
|
||||
title="[OPTIONAL] VAE",
|
||||
input=Input.Connection,
|
||||
ui_order=7,
|
||||
)
|
||||
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=8)
|
||||
fp32: bool = InputField(
|
||||
default=DEFAULT_PRECISION == torch.float32,
|
||||
description=FieldDescriptions.fp32,
|
||||
ui_order=9,
|
||||
)
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> GradientMaskOutput:
|
||||
mask_image = context.images.get_pil(self.mask.image_name, mode="L")
|
||||
if self.edge_radius > 0:
|
||||
if self.coherence_mode == "Box Blur":
|
||||
blur_mask = mask_image.filter(ImageFilter.BoxBlur(self.edge_radius))
|
||||
else: # Gaussian Blur OR Staged
|
||||
# Gaussian Blur uses standard deviation. 1/2 radius is a good approximation
|
||||
blur_mask = mask_image.filter(ImageFilter.GaussianBlur(self.edge_radius / 2))
|
||||
|
||||
blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(blur_mask, normalize=False)
|
||||
|
||||
# redistribute blur so that the original edges are 0 and blur outwards to 1
|
||||
blur_tensor = (blur_tensor - 0.5) * 2
|
||||
|
||||
threshold = 1 - self.minimum_denoise
|
||||
|
||||
if self.coherence_mode == "Staged":
|
||||
# wherever the blur_tensor is less than fully masked, convert it to threshold
|
||||
blur_tensor = torch.where((blur_tensor < 1) & (blur_tensor > 0), threshold, blur_tensor)
|
||||
else:
|
||||
# wherever the blur_tensor is above threshold but less than 1, drop it to threshold
|
||||
blur_tensor = torch.where((blur_tensor > threshold) & (blur_tensor < 1), threshold, blur_tensor)
|
||||
|
||||
else:
|
||||
blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
|
||||
|
||||
mask_name = context.tensors.save(tensor=blur_tensor.unsqueeze(1))
|
||||
|
||||
# compute a [0, 1] mask from the blur_tensor
|
||||
expanded_mask = torch.where((blur_tensor < 1), 0, 1)
|
||||
expanded_mask_image = Image.fromarray((expanded_mask.squeeze(0).numpy() * 255).astype(np.uint8), mode="L")
|
||||
expanded_image_dto = context.images.save(expanded_mask_image)
|
||||
|
||||
masked_latents_name = None
|
||||
if self.unet is not None and self.vae is not None and self.image is not None:
|
||||
# all three fields must be present at the same time
|
||||
main_model_config = context.models.get_config(self.unet.unet.key)
|
||||
assert isinstance(main_model_config, MainConfigBase)
|
||||
if main_model_config.variant is ModelVariantType.Inpaint:
|
||||
mask = blur_tensor
|
||||
vae_info: LoadedModel = context.models.load(self.vae.vae)
|
||||
image = context.images.get_pil(self.image.image_name)
|
||||
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
|
||||
if image_tensor.dim() == 3:
|
||||
image_tensor = image_tensor.unsqueeze(0)
|
||||
img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
|
||||
masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
|
||||
masked_latents = ImageToLatentsInvocation.vae_encode(
|
||||
vae_info, self.fp32, self.tiled, masked_image.clone()
|
||||
)
|
||||
masked_latents_name = context.tensors.save(tensor=masked_latents)
|
||||
|
||||
return GradientMaskOutput(
|
||||
denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=masked_latents_name, gradient=True),
|
||||
expanded_mask_area=ImageField(image_name=expanded_image_dto.image_name),
|
||||
)
|
||||
61
invokeai/app/invocations/crop_latents.py
Normal file
61
invokeai/app/invocations/crop_latents.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
|
||||
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
|
||||
|
||||
# The Crop Latents node was copied from @skunkworxdark's implementation here:
|
||||
# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80
|
||||
@invocation(
|
||||
"crop_latents",
|
||||
title="Crop Latents",
|
||||
tags=["latents", "crop"],
|
||||
category="latents",
|
||||
version="1.0.2",
|
||||
)
|
||||
# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`.
|
||||
# Currently, if the class names conflict then 'GET /openapi.json' fails.
|
||||
class CropLatentsCoreInvocation(BaseInvocation):
|
||||
"""Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be
|
||||
divisible by the latent scale factor of 8.
|
||||
"""
|
||||
|
||||
latents: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
x: int = InputField(
|
||||
ge=0,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
|
||||
)
|
||||
y: int = InputField(
|
||||
ge=0,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
|
||||
)
|
||||
width: int = InputField(
|
||||
ge=1,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
|
||||
)
|
||||
height: int = InputField(
|
||||
ge=1,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
|
||||
)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
latents = context.tensors.load(self.latents.latents_name)
|
||||
|
||||
x1 = self.x // LATENT_SCALE_FACTOR
|
||||
y1 = self.y // LATENT_SCALE_FACTOR
|
||||
x2 = x1 + (self.width // LATENT_SCALE_FACTOR)
|
||||
y2 = y1 + (self.height // LATENT_SCALE_FACTOR)
|
||||
|
||||
cropped_latents = latents[..., y1:y2, x1:x2]
|
||||
|
||||
name = context.tensors.save(tensor=cropped_latents)
|
||||
|
||||
return LatentsOutput.build(latents_name=name, latents=cropped_latents)
|
||||
848
invokeai/app/invocations/denoise_latents.py
Normal file
848
invokeai/app/invocations/denoise_latents.py
Normal file
@@ -0,0 +1,848 @@
|
||||
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
|
||||
import inspect
|
||||
from contextlib import ExitStack
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
import torchvision
|
||||
import torchvision.transforms as T
|
||||
from diffusers.configuration_utils import ConfigMixin
|
||||
from diffusers.models.adapter import T2IAdapter
|
||||
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
||||
from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler
|
||||
from diffusers.schedulers.scheduling_tcd import TCDScheduler
|
||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler
|
||||
from pydantic import field_validator
|
||||
from torchvision.transforms.functional import resize as tv_resize
|
||||
from transformers import CLIPVisionModelWithProjection
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
|
||||
from invokeai.app.invocations.controlnet_image_processors import ControlField
|
||||
from invokeai.app.invocations.fields import (
|
||||
ConditioningField,
|
||||
DenoiseMaskField,
|
||||
FieldDescriptions,
|
||||
Input,
|
||||
InputField,
|
||||
LatentsField,
|
||||
UIType,
|
||||
)
|
||||
from invokeai.app.invocations.ip_adapter import IPAdapterField
|
||||
from invokeai.app.invocations.model import ModelIdentifierField, UNetField
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.invocations.t2i_adapter import T2IAdapterField
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.app.util.controlnet_utils import prepare_control_image
|
||||
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
|
||||
from invokeai.backend.lora import LoRAModelRaw
|
||||
from invokeai.backend.model_manager import BaseModelType
|
||||
from invokeai.backend.model_patcher import ModelPatcher
|
||||
from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import (
|
||||
ControlNetData,
|
||||
StableDiffusionGeneratorPipeline,
|
||||
T2IAdapterData,
|
||||
)
|
||||
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
|
||||
BasicConditioningInfo,
|
||||
IPAdapterConditioningInfo,
|
||||
IPAdapterData,
|
||||
Range,
|
||||
SDXLConditioningInfo,
|
||||
TextConditioningData,
|
||||
TextConditioningRegions,
|
||||
)
|
||||
from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
from invokeai.backend.util.hotfixes import ControlNetModel
|
||||
from invokeai.backend.util.mask import to_standard_float_mask
|
||||
from invokeai.backend.util.silence_warnings import SilenceWarnings
|
||||
|
||||
|
||||
def get_scheduler(
|
||||
context: InvocationContext,
|
||||
scheduler_info: ModelIdentifierField,
|
||||
scheduler_name: str,
|
||||
seed: int,
|
||||
) -> Scheduler:
|
||||
"""Load a scheduler and apply some scheduler-specific overrides."""
|
||||
# TODO(ryand): Silently falling back to ddim seems like a bad idea. Look into why this was added and remove if
|
||||
# possible.
|
||||
scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP["ddim"])
|
||||
orig_scheduler_info = context.models.load(scheduler_info)
|
||||
with orig_scheduler_info as orig_scheduler:
|
||||
scheduler_config = orig_scheduler.config
|
||||
|
||||
if "_backup" in scheduler_config:
|
||||
scheduler_config = scheduler_config["_backup"]
|
||||
scheduler_config = {
|
||||
**scheduler_config,
|
||||
**scheduler_extra_config, # FIXME
|
||||
"_backup": scheduler_config,
|
||||
}
|
||||
|
||||
# make dpmpp_sde reproducable(seed can be passed only in initializer)
|
||||
if scheduler_class is DPMSolverSDEScheduler:
|
||||
scheduler_config["noise_sampler_seed"] = seed
|
||||
|
||||
scheduler = scheduler_class.from_config(scheduler_config)
|
||||
|
||||
# hack copied over from generate.py
|
||||
if not hasattr(scheduler, "uses_inpainting_model"):
|
||||
scheduler.uses_inpainting_model = lambda: False
|
||||
assert isinstance(scheduler, Scheduler)
|
||||
return scheduler
|
||||
|
||||
|
||||
@invocation(
|
||||
"denoise_latents",
|
||||
title="Denoise Latents",
|
||||
tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
|
||||
category="latents",
|
||||
version="1.5.3",
|
||||
)
|
||||
class DenoiseLatentsInvocation(BaseInvocation):
|
||||
"""Denoises noisy latents to decodable images"""
|
||||
|
||||
positive_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
|
||||
description=FieldDescriptions.positive_cond, input=Input.Connection, ui_order=0
|
||||
)
|
||||
negative_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
|
||||
description=FieldDescriptions.negative_cond, input=Input.Connection, ui_order=1
|
||||
)
|
||||
noise: Optional[LatentsField] = InputField(
|
||||
default=None,
|
||||
description=FieldDescriptions.noise,
|
||||
input=Input.Connection,
|
||||
ui_order=3,
|
||||
)
|
||||
steps: int = InputField(default=10, gt=0, description=FieldDescriptions.steps)
|
||||
cfg_scale: Union[float, List[float]] = InputField(
|
||||
default=7.5, description=FieldDescriptions.cfg_scale, title="CFG Scale"
|
||||
)
|
||||
denoising_start: float = InputField(
|
||||
default=0.0,
|
||||
ge=0,
|
||||
le=1,
|
||||
description=FieldDescriptions.denoising_start,
|
||||
)
|
||||
denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
|
||||
scheduler: SCHEDULER_NAME_VALUES = InputField(
|
||||
default="euler",
|
||||
description=FieldDescriptions.scheduler,
|
||||
ui_type=UIType.Scheduler,
|
||||
)
|
||||
unet: UNetField = InputField(
|
||||
description=FieldDescriptions.unet,
|
||||
input=Input.Connection,
|
||||
title="UNet",
|
||||
ui_order=2,
|
||||
)
|
||||
control: Optional[Union[ControlField, list[ControlField]]] = InputField(
|
||||
default=None,
|
||||
input=Input.Connection,
|
||||
ui_order=5,
|
||||
)
|
||||
ip_adapter: Optional[Union[IPAdapterField, list[IPAdapterField]]] = InputField(
|
||||
description=FieldDescriptions.ip_adapter,
|
||||
title="IP-Adapter",
|
||||
default=None,
|
||||
input=Input.Connection,
|
||||
ui_order=6,
|
||||
)
|
||||
t2i_adapter: Optional[Union[T2IAdapterField, list[T2IAdapterField]]] = InputField(
|
||||
description=FieldDescriptions.t2i_adapter,
|
||||
title="T2I-Adapter",
|
||||
default=None,
|
||||
input=Input.Connection,
|
||||
ui_order=7,
|
||||
)
|
||||
cfg_rescale_multiplier: float = InputField(
|
||||
title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
|
||||
)
|
||||
latents: Optional[LatentsField] = InputField(
|
||||
default=None,
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
ui_order=4,
|
||||
)
|
||||
denoise_mask: Optional[DenoiseMaskField] = InputField(
|
||||
default=None,
|
||||
description=FieldDescriptions.mask,
|
||||
input=Input.Connection,
|
||||
ui_order=8,
|
||||
)
|
||||
|
||||
@field_validator("cfg_scale")
|
||||
def ge_one(cls, v: Union[List[float], float]) -> Union[List[float], float]:
|
||||
"""validate that all cfg_scale values are >= 1"""
|
||||
if isinstance(v, list):
|
||||
for i in v:
|
||||
if i < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
else:
|
||||
if v < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
return v
|
||||
|
||||
@staticmethod
|
||||
def _get_text_embeddings_and_masks(
|
||||
cond_list: list[ConditioningField],
|
||||
context: InvocationContext,
|
||||
device: torch.device,
|
||||
dtype: torch.dtype,
|
||||
) -> tuple[Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]], list[Optional[torch.Tensor]]]:
|
||||
"""Get the text embeddings and masks from the input conditioning fields."""
|
||||
text_embeddings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]] = []
|
||||
text_embeddings_masks: list[Optional[torch.Tensor]] = []
|
||||
for cond in cond_list:
|
||||
cond_data = context.conditioning.load(cond.conditioning_name)
|
||||
text_embeddings.append(cond_data.conditionings[0].to(device=device, dtype=dtype))
|
||||
|
||||
mask = cond.mask
|
||||
if mask is not None:
|
||||
mask = context.tensors.load(mask.tensor_name)
|
||||
text_embeddings_masks.append(mask)
|
||||
|
||||
return text_embeddings, text_embeddings_masks
|
||||
|
||||
@staticmethod
|
||||
def _preprocess_regional_prompt_mask(
|
||||
mask: Optional[torch.Tensor], target_height: int, target_width: int, dtype: torch.dtype
|
||||
) -> torch.Tensor:
|
||||
"""Preprocess a regional prompt mask to match the target height and width.
|
||||
If mask is None, returns a mask of all ones with the target height and width.
|
||||
If mask is not None, resizes the mask to the target height and width using 'nearest' interpolation.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The processed mask. shape: (1, 1, target_height, target_width).
|
||||
"""
|
||||
|
||||
if mask is None:
|
||||
return torch.ones((1, 1, target_height, target_width), dtype=dtype)
|
||||
|
||||
mask = to_standard_float_mask(mask, out_dtype=dtype)
|
||||
|
||||
tf = torchvision.transforms.Resize(
|
||||
(target_height, target_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST
|
||||
)
|
||||
|
||||
# Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
|
||||
mask = mask.unsqueeze(0) # Shape: (1, h, w) -> (1, 1, h, w)
|
||||
resized_mask = tf(mask)
|
||||
return resized_mask
|
||||
|
||||
@staticmethod
|
||||
def _concat_regional_text_embeddings(
|
||||
text_conditionings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]],
|
||||
masks: Optional[list[Optional[torch.Tensor]]],
|
||||
latent_height: int,
|
||||
latent_width: int,
|
||||
dtype: torch.dtype,
|
||||
) -> tuple[Union[BasicConditioningInfo, SDXLConditioningInfo], Optional[TextConditioningRegions]]:
|
||||
"""Concatenate regional text embeddings into a single embedding and track the region masks accordingly."""
|
||||
if masks is None:
|
||||
masks = [None] * len(text_conditionings)
|
||||
assert len(text_conditionings) == len(masks)
|
||||
|
||||
is_sdxl = type(text_conditionings[0]) is SDXLConditioningInfo
|
||||
|
||||
all_masks_are_none = all(mask is None for mask in masks)
|
||||
|
||||
text_embedding = []
|
||||
pooled_embedding = None
|
||||
add_time_ids = None
|
||||
cur_text_embedding_len = 0
|
||||
processed_masks = []
|
||||
embedding_ranges = []
|
||||
|
||||
for prompt_idx, text_embedding_info in enumerate(text_conditionings):
|
||||
mask = masks[prompt_idx]
|
||||
|
||||
if is_sdxl:
|
||||
# We choose a random SDXLConditioningInfo's pooled_embeds and add_time_ids here, with a preference for
|
||||
# prompts without a mask. We prefer prompts without a mask, because they are more likely to contain
|
||||
# global prompt information. In an ideal case, there should be exactly one global prompt without a
|
||||
# mask, but we don't enforce this.
|
||||
|
||||
# HACK(ryand): The fact that we have to choose a single pooled_embedding and add_time_ids here is a
|
||||
# fundamental interface issue. The SDXL Compel nodes are not designed to be used in the way that we use
|
||||
# them for regional prompting. Ideally, the DenoiseLatents invocation should accept a single
|
||||
# pooled_embeds tensor and a list of standard text embeds with region masks. This change would be a
|
||||
# pretty major breaking change to a popular node, so for now we use this hack.
|
||||
if pooled_embedding is None or mask is None:
|
||||
pooled_embedding = text_embedding_info.pooled_embeds
|
||||
if add_time_ids is None or mask is None:
|
||||
add_time_ids = text_embedding_info.add_time_ids
|
||||
|
||||
text_embedding.append(text_embedding_info.embeds)
|
||||
if not all_masks_are_none:
|
||||
embedding_ranges.append(
|
||||
Range(
|
||||
start=cur_text_embedding_len, end=cur_text_embedding_len + text_embedding_info.embeds.shape[1]
|
||||
)
|
||||
)
|
||||
processed_masks.append(
|
||||
DenoiseLatentsInvocation._preprocess_regional_prompt_mask(
|
||||
mask, latent_height, latent_width, dtype=dtype
|
||||
)
|
||||
)
|
||||
|
||||
cur_text_embedding_len += text_embedding_info.embeds.shape[1]
|
||||
|
||||
text_embedding = torch.cat(text_embedding, dim=1)
|
||||
assert len(text_embedding.shape) == 3 # batch_size, seq_len, token_len
|
||||
|
||||
regions = None
|
||||
if not all_masks_are_none:
|
||||
regions = TextConditioningRegions(
|
||||
masks=torch.cat(processed_masks, dim=1),
|
||||
ranges=embedding_ranges,
|
||||
)
|
||||
|
||||
if is_sdxl:
|
||||
return (
|
||||
SDXLConditioningInfo(embeds=text_embedding, pooled_embeds=pooled_embedding, add_time_ids=add_time_ids),
|
||||
regions,
|
||||
)
|
||||
return BasicConditioningInfo(embeds=text_embedding), regions
|
||||
|
||||
@staticmethod
|
||||
def get_conditioning_data(
|
||||
context: InvocationContext,
|
||||
positive_conditioning_field: Union[ConditioningField, list[ConditioningField]],
|
||||
negative_conditioning_field: Union[ConditioningField, list[ConditioningField]],
|
||||
unet: UNet2DConditionModel,
|
||||
latent_height: int,
|
||||
latent_width: int,
|
||||
cfg_scale: float | list[float],
|
||||
steps: int,
|
||||
cfg_rescale_multiplier: float,
|
||||
) -> TextConditioningData:
|
||||
# Normalize positive_conditioning_field and negative_conditioning_field to lists.
|
||||
cond_list = positive_conditioning_field
|
||||
if not isinstance(cond_list, list):
|
||||
cond_list = [cond_list]
|
||||
uncond_list = negative_conditioning_field
|
||||
if not isinstance(uncond_list, list):
|
||||
uncond_list = [uncond_list]
|
||||
|
||||
cond_text_embeddings, cond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
|
||||
cond_list, context, unet.device, unet.dtype
|
||||
)
|
||||
uncond_text_embeddings, uncond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
|
||||
uncond_list, context, unet.device, unet.dtype
|
||||
)
|
||||
|
||||
cond_text_embedding, cond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
|
||||
text_conditionings=cond_text_embeddings,
|
||||
masks=cond_text_embedding_masks,
|
||||
latent_height=latent_height,
|
||||
latent_width=latent_width,
|
||||
dtype=unet.dtype,
|
||||
)
|
||||
uncond_text_embedding, uncond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
|
||||
text_conditionings=uncond_text_embeddings,
|
||||
masks=uncond_text_embedding_masks,
|
||||
latent_height=latent_height,
|
||||
latent_width=latent_width,
|
||||
dtype=unet.dtype,
|
||||
)
|
||||
|
||||
if isinstance(cfg_scale, list):
|
||||
assert len(cfg_scale) == steps, "cfg_scale (list) must have the same length as the number of steps"
|
||||
|
||||
conditioning_data = TextConditioningData(
|
||||
uncond_text=uncond_text_embedding,
|
||||
cond_text=cond_text_embedding,
|
||||
uncond_regions=uncond_regions,
|
||||
cond_regions=cond_regions,
|
||||
guidance_scale=cfg_scale,
|
||||
guidance_rescale_multiplier=cfg_rescale_multiplier,
|
||||
)
|
||||
return conditioning_data
|
||||
|
||||
@staticmethod
|
||||
def create_pipeline(
|
||||
unet: UNet2DConditionModel,
|
||||
scheduler: Scheduler,
|
||||
) -> StableDiffusionGeneratorPipeline:
|
||||
class FakeVae:
|
||||
class FakeVaeConfig:
|
||||
def __init__(self) -> None:
|
||||
self.block_out_channels = [0]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.config = FakeVae.FakeVaeConfig()
|
||||
|
||||
return StableDiffusionGeneratorPipeline(
|
||||
vae=FakeVae(), # TODO: oh...
|
||||
text_encoder=None,
|
||||
tokenizer=None,
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
safety_checker=None,
|
||||
feature_extractor=None,
|
||||
requires_safety_checker=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def prep_control_data(
|
||||
context: InvocationContext,
|
||||
control_input: ControlField | list[ControlField] | None,
|
||||
latents_shape: List[int],
|
||||
exit_stack: ExitStack,
|
||||
do_classifier_free_guidance: bool = True,
|
||||
) -> list[ControlNetData] | None:
|
||||
# Normalize control_input to a list.
|
||||
control_list: list[ControlField]
|
||||
if isinstance(control_input, ControlField):
|
||||
control_list = [control_input]
|
||||
elif isinstance(control_input, list):
|
||||
control_list = control_input
|
||||
elif control_input is None:
|
||||
control_list = []
|
||||
else:
|
||||
raise ValueError(f"Unexpected control_input type: {type(control_input)}")
|
||||
|
||||
if len(control_list) == 0:
|
||||
return None
|
||||
|
||||
# Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR.
|
||||
_, _, latent_height, latent_width = latents_shape
|
||||
control_height_resize = latent_height * LATENT_SCALE_FACTOR
|
||||
control_width_resize = latent_width * LATENT_SCALE_FACTOR
|
||||
|
||||
controlnet_data: list[ControlNetData] = []
|
||||
for control_info in control_list:
|
||||
control_model = exit_stack.enter_context(context.models.load(control_info.control_model))
|
||||
assert isinstance(control_model, ControlNetModel)
|
||||
|
||||
control_image_field = control_info.image
|
||||
input_image = context.images.get_pil(control_image_field.image_name)
|
||||
# self.image.image_type, self.image.image_name
|
||||
# FIXME: still need to test with different widths, heights, devices, dtypes
|
||||
# and add in batch_size, num_images_per_prompt?
|
||||
# and do real check for classifier_free_guidance?
|
||||
# prepare_control_image should return torch.Tensor of shape(batch_size, 3, height, width)
|
||||
control_image = prepare_control_image(
|
||||
image=input_image,
|
||||
do_classifier_free_guidance=do_classifier_free_guidance,
|
||||
width=control_width_resize,
|
||||
height=control_height_resize,
|
||||
# batch_size=batch_size * num_images_per_prompt,
|
||||
# num_images_per_prompt=num_images_per_prompt,
|
||||
device=control_model.device,
|
||||
dtype=control_model.dtype,
|
||||
control_mode=control_info.control_mode,
|
||||
resize_mode=control_info.resize_mode,
|
||||
)
|
||||
control_item = ControlNetData(
|
||||
model=control_model,
|
||||
image_tensor=control_image,
|
||||
weight=control_info.control_weight,
|
||||
begin_step_percent=control_info.begin_step_percent,
|
||||
end_step_percent=control_info.end_step_percent,
|
||||
control_mode=control_info.control_mode,
|
||||
# any resizing needed should currently be happening in prepare_control_image(),
|
||||
# but adding resize_mode to ControlNetData in case needed in the future
|
||||
resize_mode=control_info.resize_mode,
|
||||
)
|
||||
controlnet_data.append(control_item)
|
||||
# MultiControlNetModel has been refactored out, just need list[ControlNetData]
|
||||
|
||||
return controlnet_data
|
||||
|
||||
def prep_ip_adapter_image_prompts(
|
||||
self,
|
||||
context: InvocationContext,
|
||||
ip_adapters: List[IPAdapterField],
|
||||
) -> List[Tuple[torch.Tensor, torch.Tensor]]:
|
||||
"""Run the IPAdapter CLIPVisionModel, returning image prompt embeddings."""
|
||||
image_prompts = []
|
||||
for single_ip_adapter in ip_adapters:
|
||||
with context.models.load(single_ip_adapter.ip_adapter_model) as ip_adapter_model:
|
||||
assert isinstance(ip_adapter_model, IPAdapter)
|
||||
image_encoder_model_info = context.models.load(single_ip_adapter.image_encoder_model)
|
||||
# `single_ip_adapter.image` could be a list or a single ImageField. Normalize to a list here.
|
||||
single_ipa_image_fields = single_ip_adapter.image
|
||||
if not isinstance(single_ipa_image_fields, list):
|
||||
single_ipa_image_fields = [single_ipa_image_fields]
|
||||
|
||||
single_ipa_images = [context.images.get_pil(image.image_name) for image in single_ipa_image_fields]
|
||||
with image_encoder_model_info as image_encoder_model:
|
||||
assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
|
||||
# Get image embeddings from CLIP and ImageProjModel.
|
||||
image_prompt_embeds, uncond_image_prompt_embeds = ip_adapter_model.get_image_embeds(
|
||||
single_ipa_images, image_encoder_model
|
||||
)
|
||||
image_prompts.append((image_prompt_embeds, uncond_image_prompt_embeds))
|
||||
|
||||
return image_prompts
|
||||
|
||||
def prep_ip_adapter_data(
|
||||
self,
|
||||
context: InvocationContext,
|
||||
ip_adapters: List[IPAdapterField],
|
||||
image_prompts: List[Tuple[torch.Tensor, torch.Tensor]],
|
||||
exit_stack: ExitStack,
|
||||
latent_height: int,
|
||||
latent_width: int,
|
||||
dtype: torch.dtype,
|
||||
) -> Optional[List[IPAdapterData]]:
|
||||
"""If IP-Adapter is enabled, then this function loads the requisite models and adds the image prompt conditioning data."""
|
||||
ip_adapter_data_list = []
|
||||
for single_ip_adapter, (image_prompt_embeds, uncond_image_prompt_embeds) in zip(
|
||||
ip_adapters, image_prompts, strict=True
|
||||
):
|
||||
ip_adapter_model = exit_stack.enter_context(context.models.load(single_ip_adapter.ip_adapter_model))
|
||||
|
||||
mask_field = single_ip_adapter.mask
|
||||
mask = context.tensors.load(mask_field.tensor_name) if mask_field is not None else None
|
||||
mask = self._preprocess_regional_prompt_mask(mask, latent_height, latent_width, dtype=dtype)
|
||||
|
||||
ip_adapter_data_list.append(
|
||||
IPAdapterData(
|
||||
ip_adapter_model=ip_adapter_model,
|
||||
weight=single_ip_adapter.weight,
|
||||
target_blocks=single_ip_adapter.target_blocks,
|
||||
begin_step_percent=single_ip_adapter.begin_step_percent,
|
||||
end_step_percent=single_ip_adapter.end_step_percent,
|
||||
ip_adapter_conditioning=IPAdapterConditioningInfo(image_prompt_embeds, uncond_image_prompt_embeds),
|
||||
mask=mask,
|
||||
)
|
||||
)
|
||||
|
||||
return ip_adapter_data_list if len(ip_adapter_data_list) > 0 else None
|
||||
|
||||
def run_t2i_adapters(
|
||||
self,
|
||||
context: InvocationContext,
|
||||
t2i_adapter: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
|
||||
latents_shape: list[int],
|
||||
do_classifier_free_guidance: bool,
|
||||
) -> Optional[list[T2IAdapterData]]:
|
||||
if t2i_adapter is None:
|
||||
return None
|
||||
|
||||
# Handle the possibility that t2i_adapter could be a list or a single T2IAdapterField.
|
||||
if isinstance(t2i_adapter, T2IAdapterField):
|
||||
t2i_adapter = [t2i_adapter]
|
||||
|
||||
if len(t2i_adapter) == 0:
|
||||
return None
|
||||
|
||||
t2i_adapter_data = []
|
||||
for t2i_adapter_field in t2i_adapter:
|
||||
t2i_adapter_model_config = context.models.get_config(t2i_adapter_field.t2i_adapter_model.key)
|
||||
t2i_adapter_loaded_model = context.models.load(t2i_adapter_field.t2i_adapter_model)
|
||||
image = context.images.get_pil(t2i_adapter_field.image.image_name)
|
||||
|
||||
# The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
|
||||
if t2i_adapter_model_config.base == BaseModelType.StableDiffusion1:
|
||||
max_unet_downscale = 8
|
||||
elif t2i_adapter_model_config.base == BaseModelType.StableDiffusionXL:
|
||||
max_unet_downscale = 4
|
||||
else:
|
||||
raise ValueError(f"Unexpected T2I-Adapter base model type: '{t2i_adapter_model_config.base}'.")
|
||||
|
||||
t2i_adapter_model: T2IAdapter
|
||||
with t2i_adapter_loaded_model as t2i_adapter_model:
|
||||
total_downscale_factor = t2i_adapter_model.total_downscale_factor
|
||||
|
||||
# Resize the T2I-Adapter input image.
|
||||
# We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
|
||||
# result will match the latent image's dimensions after max_unet_downscale is applied.
|
||||
t2i_input_height = latents_shape[2] // max_unet_downscale * total_downscale_factor
|
||||
t2i_input_width = latents_shape[3] // max_unet_downscale * total_downscale_factor
|
||||
|
||||
# Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
|
||||
# a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
|
||||
# T2I-Adapter model.
|
||||
#
|
||||
# Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
|
||||
# of the same requirements (e.g. preserving binary masks during resize).
|
||||
t2i_image = prepare_control_image(
|
||||
image=image,
|
||||
do_classifier_free_guidance=False,
|
||||
width=t2i_input_width,
|
||||
height=t2i_input_height,
|
||||
num_channels=t2i_adapter_model.config["in_channels"], # mypy treats this as a FrozenDict
|
||||
device=t2i_adapter_model.device,
|
||||
dtype=t2i_adapter_model.dtype,
|
||||
resize_mode=t2i_adapter_field.resize_mode,
|
||||
)
|
||||
|
||||
adapter_state = t2i_adapter_model(t2i_image)
|
||||
|
||||
if do_classifier_free_guidance:
|
||||
for idx, value in enumerate(adapter_state):
|
||||
adapter_state[idx] = torch.cat([value] * 2, dim=0)
|
||||
|
||||
t2i_adapter_data.append(
|
||||
T2IAdapterData(
|
||||
adapter_state=adapter_state,
|
||||
weight=t2i_adapter_field.weight,
|
||||
begin_step_percent=t2i_adapter_field.begin_step_percent,
|
||||
end_step_percent=t2i_adapter_field.end_step_percent,
|
||||
)
|
||||
)
|
||||
|
||||
return t2i_adapter_data
|
||||
|
||||
# original idea by https://github.com/AmericanPresidentJimmyCarter
|
||||
# TODO: research more for second order schedulers timesteps
|
||||
@staticmethod
|
||||
def init_scheduler(
|
||||
scheduler: Union[Scheduler, ConfigMixin],
|
||||
device: torch.device,
|
||||
steps: int,
|
||||
denoising_start: float,
|
||||
denoising_end: float,
|
||||
seed: int,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
|
||||
assert isinstance(scheduler, ConfigMixin)
|
||||
if scheduler.config.get("cpu_only", False):
|
||||
scheduler.set_timesteps(steps, device="cpu")
|
||||
timesteps = scheduler.timesteps.to(device=device)
|
||||
else:
|
||||
scheduler.set_timesteps(steps, device=device)
|
||||
timesteps = scheduler.timesteps
|
||||
|
||||
# skip greater order timesteps
|
||||
_timesteps = timesteps[:: scheduler.order]
|
||||
|
||||
# get start timestep index
|
||||
t_start_val = int(round(scheduler.config["num_train_timesteps"] * (1 - denoising_start)))
|
||||
t_start_idx = len(list(filter(lambda ts: ts >= t_start_val, _timesteps)))
|
||||
|
||||
# get end timestep index
|
||||
t_end_val = int(round(scheduler.config["num_train_timesteps"] * (1 - denoising_end)))
|
||||
t_end_idx = len(list(filter(lambda ts: ts >= t_end_val, _timesteps[t_start_idx:])))
|
||||
|
||||
# apply order to indexes
|
||||
t_start_idx *= scheduler.order
|
||||
t_end_idx *= scheduler.order
|
||||
|
||||
init_timestep = timesteps[t_start_idx : t_start_idx + 1]
|
||||
timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx]
|
||||
|
||||
scheduler_step_kwargs: Dict[str, Any] = {}
|
||||
scheduler_step_signature = inspect.signature(scheduler.step)
|
||||
if "generator" in scheduler_step_signature.parameters:
|
||||
# At some point, someone decided that schedulers that accept a generator should use the original seed with
|
||||
# all bits flipped. I don't know the original rationale for this, but now we must keep it like this for
|
||||
# reproducibility.
|
||||
#
|
||||
# These Invoke-supported schedulers accept a generator as of 2024-06-04:
|
||||
# - DDIMScheduler
|
||||
# - DDPMScheduler
|
||||
# - DPMSolverMultistepScheduler
|
||||
# - EulerAncestralDiscreteScheduler
|
||||
# - EulerDiscreteScheduler
|
||||
# - KDPM2AncestralDiscreteScheduler
|
||||
# - LCMScheduler
|
||||
# - TCDScheduler
|
||||
scheduler_step_kwargs.update({"generator": torch.Generator(device=device).manual_seed(seed ^ 0xFFFFFFFF)})
|
||||
if isinstance(scheduler, TCDScheduler):
|
||||
scheduler_step_kwargs.update({"eta": 1.0})
|
||||
|
||||
return timesteps, init_timestep, scheduler_step_kwargs
|
||||
|
||||
def prep_inpaint_mask(
|
||||
self, context: InvocationContext, latents: torch.Tensor
|
||||
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], bool]:
|
||||
if self.denoise_mask is None:
|
||||
return None, None, False
|
||||
|
||||
mask = context.tensors.load(self.denoise_mask.mask_name)
|
||||
mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
|
||||
if self.denoise_mask.masked_latents_name is not None:
|
||||
masked_latents = context.tensors.load(self.denoise_mask.masked_latents_name)
|
||||
else:
|
||||
masked_latents = torch.where(mask < 0.5, 0.0, latents)
|
||||
|
||||
return 1 - mask, masked_latents, self.denoise_mask.gradient
|
||||
|
||||
@staticmethod
|
||||
def prepare_noise_and_latents(
|
||||
context: InvocationContext, noise_field: LatentsField | None, latents_field: LatentsField | None
|
||||
) -> Tuple[int, torch.Tensor | None, torch.Tensor]:
|
||||
"""Depending on the workflow, we expect different combinations of noise and latents to be provided. This
|
||||
function handles preparing these values accordingly.
|
||||
|
||||
Expected workflows:
|
||||
- Text-to-Image Denoising: `noise` is provided, `latents` is not. `latents` is initialized to zeros.
|
||||
- Image-to-Image Denoising: `noise` and `latents` are both provided.
|
||||
- Text-to-Image SDXL Refiner Denoising: `latents` is provided, `noise` is not.
|
||||
- Image-to-Image SDXL Refiner Denoising: `latents` is provided, `noise` is not.
|
||||
|
||||
NOTE(ryand): I wrote this docstring, but I am not the original author of this code. There may be other workflows
|
||||
I haven't considered.
|
||||
"""
|
||||
noise = None
|
||||
if noise_field is not None:
|
||||
noise = context.tensors.load(noise_field.latents_name)
|
||||
|
||||
if latents_field is not None:
|
||||
latents = context.tensors.load(latents_field.latents_name)
|
||||
elif noise is not None:
|
||||
latents = torch.zeros_like(noise)
|
||||
else:
|
||||
raise ValueError("'latents' or 'noise' must be provided!")
|
||||
|
||||
if noise is not None and noise.shape[1:] != latents.shape[1:]:
|
||||
raise ValueError(f"Incompatable 'noise' and 'latents' shapes: {latents.shape=} {noise.shape=}")
|
||||
|
||||
# The seed comes from (in order of priority): the noise field, the latents field, or 0.
|
||||
seed = 0
|
||||
if noise_field is not None and noise_field.seed is not None:
|
||||
seed = noise_field.seed
|
||||
elif latents_field is not None and latents_field.seed is not None:
|
||||
seed = latents_field.seed
|
||||
else:
|
||||
seed = 0
|
||||
|
||||
return seed, noise, latents
|
||||
|
||||
@torch.no_grad()
|
||||
@SilenceWarnings() # This quenches the NSFW nag from diffusers.
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
|
||||
|
||||
mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
|
||||
|
||||
# TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
|
||||
# below. Investigate whether this is appropriate.
|
||||
t2i_adapter_data = self.run_t2i_adapters(
|
||||
context,
|
||||
self.t2i_adapter,
|
||||
latents.shape,
|
||||
do_classifier_free_guidance=True,
|
||||
)
|
||||
|
||||
ip_adapters: List[IPAdapterField] = []
|
||||
if self.ip_adapter is not None:
|
||||
# ip_adapter could be a list or a single IPAdapterField. Normalize to a list here.
|
||||
if isinstance(self.ip_adapter, list):
|
||||
ip_adapters = self.ip_adapter
|
||||
else:
|
||||
ip_adapters = [self.ip_adapter]
|
||||
|
||||
# If there are IP adapters, the following line runs the adapters' CLIPVision image encoders to return
|
||||
# a series of image conditioning embeddings. This is being done here rather than in the
|
||||
# big model context below in order to use less VRAM on low-VRAM systems.
|
||||
# The image prompts are then passed to prep_ip_adapter_data().
|
||||
image_prompts = self.prep_ip_adapter_image_prompts(context=context, ip_adapters=ip_adapters)
|
||||
|
||||
# get the unet's config so that we can pass the base to dispatch_progress()
|
||||
unet_config = context.models.get_config(self.unet.unet.key)
|
||||
|
||||
def step_callback(state: PipelineIntermediateState) -> None:
|
||||
context.util.sd_step_callback(state, unet_config.base)
|
||||
|
||||
def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
|
||||
for lora in self.unet.loras:
|
||||
lora_info = context.models.load(lora.lora)
|
||||
assert isinstance(lora_info.model, LoRAModelRaw)
|
||||
yield (lora_info.model, lora.weight)
|
||||
del lora_info
|
||||
return
|
||||
|
||||
unet_info = context.models.load(self.unet.unet)
|
||||
assert isinstance(unet_info.model, UNet2DConditionModel)
|
||||
with (
|
||||
ExitStack() as exit_stack,
|
||||
unet_info.model_on_device() as (model_state_dict, unet),
|
||||
ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
|
||||
set_seamless(unet, self.unet.seamless_axes), # FIXME
|
||||
# Apply the LoRA after unet has been moved to its target device for faster patching.
|
||||
ModelPatcher.apply_lora_unet(
|
||||
unet,
|
||||
loras=_lora_loader(),
|
||||
model_state_dict=model_state_dict,
|
||||
),
|
||||
):
|
||||
assert isinstance(unet, UNet2DConditionModel)
|
||||
latents = latents.to(device=unet.device, dtype=unet.dtype)
|
||||
if noise is not None:
|
||||
noise = noise.to(device=unet.device, dtype=unet.dtype)
|
||||
if mask is not None:
|
||||
mask = mask.to(device=unet.device, dtype=unet.dtype)
|
||||
if masked_latents is not None:
|
||||
masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
|
||||
|
||||
scheduler = get_scheduler(
|
||||
context=context,
|
||||
scheduler_info=self.unet.scheduler,
|
||||
scheduler_name=self.scheduler,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
pipeline = self.create_pipeline(unet, scheduler)
|
||||
|
||||
_, _, latent_height, latent_width = latents.shape
|
||||
conditioning_data = self.get_conditioning_data(
|
||||
context=context,
|
||||
positive_conditioning_field=self.positive_conditioning,
|
||||
negative_conditioning_field=self.negative_conditioning,
|
||||
unet=unet,
|
||||
latent_height=latent_height,
|
||||
latent_width=latent_width,
|
||||
cfg_scale=self.cfg_scale,
|
||||
steps=self.steps,
|
||||
cfg_rescale_multiplier=self.cfg_rescale_multiplier,
|
||||
)
|
||||
|
||||
controlnet_data = self.prep_control_data(
|
||||
context=context,
|
||||
control_input=self.control,
|
||||
latents_shape=latents.shape,
|
||||
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
|
||||
do_classifier_free_guidance=True,
|
||||
exit_stack=exit_stack,
|
||||
)
|
||||
|
||||
ip_adapter_data = self.prep_ip_adapter_data(
|
||||
context=context,
|
||||
ip_adapters=ip_adapters,
|
||||
image_prompts=image_prompts,
|
||||
exit_stack=exit_stack,
|
||||
latent_height=latent_height,
|
||||
latent_width=latent_width,
|
||||
dtype=unet.dtype,
|
||||
)
|
||||
|
||||
timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
|
||||
scheduler,
|
||||
device=unet.device,
|
||||
steps=self.steps,
|
||||
denoising_start=self.denoising_start,
|
||||
denoising_end=self.denoising_end,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
result_latents = pipeline.latents_from_embeddings(
|
||||
latents=latents,
|
||||
timesteps=timesteps,
|
||||
init_timestep=init_timestep,
|
||||
noise=noise,
|
||||
seed=seed,
|
||||
mask=mask,
|
||||
masked_latents=masked_latents,
|
||||
is_gradient_mask=gradient_mask,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
conditioning_data=conditioning_data,
|
||||
control_data=controlnet_data,
|
||||
ip_adapter_data=ip_adapter_data,
|
||||
t2i_adapter_data=t2i_adapter_data,
|
||||
callback=step_callback,
|
||||
)
|
||||
|
||||
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
|
||||
result_latents = result_latents.to("cpu")
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
name = context.tensors.save(tensor=result_latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
|
||||
65
invokeai/app/invocations/ideal_size.py
Normal file
65
invokeai/app/invocations/ideal_size.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import math
|
||||
from typing import Tuple
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
|
||||
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
|
||||
from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
|
||||
from invokeai.app.invocations.model import UNetField
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.model_manager.config import BaseModelType
|
||||
|
||||
|
||||
@invocation_output("ideal_size_output")
|
||||
class IdealSizeOutput(BaseInvocationOutput):
|
||||
"""Base class for invocations that output an image"""
|
||||
|
||||
width: int = OutputField(description="The ideal width of the image (in pixels)")
|
||||
height: int = OutputField(description="The ideal height of the image (in pixels)")
|
||||
|
||||
|
||||
@invocation(
|
||||
"ideal_size",
|
||||
title="Ideal Size",
|
||||
tags=["latents", "math", "ideal_size"],
|
||||
version="1.0.3",
|
||||
)
|
||||
class IdealSizeInvocation(BaseInvocation):
|
||||
"""Calculates the ideal size for generation to avoid duplication"""
|
||||
|
||||
width: int = InputField(default=1024, description="Final image width")
|
||||
height: int = InputField(default=576, description="Final image height")
|
||||
unet: UNetField = InputField(default=None, description=FieldDescriptions.unet)
|
||||
multiplier: float = InputField(
|
||||
default=1.0,
|
||||
description="Amount to multiply the model's dimensions by when calculating the ideal size (may result in "
|
||||
"initial generation artifacts if too large)",
|
||||
)
|
||||
|
||||
def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR) -> Tuple[int, ...]:
|
||||
return tuple((x - x % multiple_of) for x in args)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> IdealSizeOutput:
|
||||
unet_config = context.models.get_config(self.unet.unet.key)
|
||||
aspect = self.width / self.height
|
||||
dimension: float = 512
|
||||
if unet_config.base == BaseModelType.StableDiffusion2:
|
||||
dimension = 768
|
||||
elif unet_config.base == BaseModelType.StableDiffusionXL:
|
||||
dimension = 1024
|
||||
dimension = dimension * self.multiplier
|
||||
min_dimension = math.floor(dimension * 0.5)
|
||||
model_area = dimension * dimension # hardcoded for now since all models are trained on square images
|
||||
|
||||
if aspect > 1.0:
|
||||
init_height = max(min_dimension, math.sqrt(model_area / aspect))
|
||||
init_width = init_height * aspect
|
||||
else:
|
||||
init_width = max(min_dimension, math.sqrt(model_area * aspect))
|
||||
init_height = init_width / aspect
|
||||
|
||||
scaled_width, scaled_height = self.trim_to_multiple_of(
|
||||
math.floor(init_width),
|
||||
math.floor(init_height),
|
||||
)
|
||||
|
||||
return IdealSizeOutput(width=scaled_width, height=scaled_height)
|
||||
125
invokeai/app/invocations/image_to_latents.py
Normal file
125
invokeai/app/invocations/image_to_latents.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from functools import singledispatchmethod
|
||||
|
||||
import einops
|
||||
import torch
|
||||
from diffusers.models.attention_processor import (
|
||||
AttnProcessor2_0,
|
||||
LoRAAttnProcessor2_0,
|
||||
LoRAXFormersAttnProcessor,
|
||||
XFormersAttnProcessor,
|
||||
)
|
||||
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
|
||||
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import DEFAULT_PRECISION
|
||||
from invokeai.app.invocations.fields import (
|
||||
FieldDescriptions,
|
||||
ImageField,
|
||||
Input,
|
||||
InputField,
|
||||
)
|
||||
from invokeai.app.invocations.model import VAEField
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.model_manager import LoadedModel
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
|
||||
|
||||
|
||||
@invocation(
|
||||
"i2l",
|
||||
title="Image to Latents",
|
||||
tags=["latents", "image", "vae", "i2l"],
|
||||
category="latents",
|
||||
version="1.0.2",
|
||||
)
|
||||
class ImageToLatentsInvocation(BaseInvocation):
|
||||
"""Encodes an image into latents."""
|
||||
|
||||
image: ImageField = InputField(
|
||||
description="The image to encode",
|
||||
)
|
||||
vae: VAEField = InputField(
|
||||
description=FieldDescriptions.vae,
|
||||
input=Input.Connection,
|
||||
)
|
||||
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
|
||||
fp32: bool = InputField(default=DEFAULT_PRECISION == torch.float32, description=FieldDescriptions.fp32)
|
||||
|
||||
@staticmethod
|
||||
def vae_encode(vae_info: LoadedModel, upcast: bool, tiled: bool, image_tensor: torch.Tensor) -> torch.Tensor:
|
||||
with vae_info as vae:
|
||||
assert isinstance(vae, torch.nn.Module)
|
||||
orig_dtype = vae.dtype
|
||||
if upcast:
|
||||
vae.to(dtype=torch.float32)
|
||||
|
||||
use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
|
||||
vae.decoder.mid_block.attentions[0].processor,
|
||||
(
|
||||
AttnProcessor2_0,
|
||||
XFormersAttnProcessor,
|
||||
LoRAXFormersAttnProcessor,
|
||||
LoRAAttnProcessor2_0,
|
||||
),
|
||||
)
|
||||
# if xformers or torch_2_0 is used attention block does not need
|
||||
# to be in float32 which can save lots of memory
|
||||
if use_torch_2_0_or_xformers:
|
||||
vae.post_quant_conv.to(orig_dtype)
|
||||
vae.decoder.conv_in.to(orig_dtype)
|
||||
vae.decoder.mid_block.to(orig_dtype)
|
||||
# else:
|
||||
# latents = latents.float()
|
||||
|
||||
else:
|
||||
vae.to(dtype=torch.float16)
|
||||
# latents = latents.half()
|
||||
|
||||
if tiled:
|
||||
vae.enable_tiling()
|
||||
else:
|
||||
vae.disable_tiling()
|
||||
|
||||
# non_noised_latents_from_image
|
||||
image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype)
|
||||
with torch.inference_mode():
|
||||
latents = ImageToLatentsInvocation._encode_to_tensor(vae, image_tensor)
|
||||
|
||||
latents = vae.config.scaling_factor * latents
|
||||
latents = latents.to(dtype=orig_dtype)
|
||||
|
||||
return latents
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
image = context.images.get_pil(self.image.image_name)
|
||||
|
||||
vae_info = context.models.load(self.vae.vae)
|
||||
|
||||
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
|
||||
if image_tensor.dim() == 3:
|
||||
image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
|
||||
|
||||
latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor)
|
||||
|
||||
latents = latents.to("cpu")
|
||||
name = context.tensors.save(tensor=latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
|
||||
|
||||
@singledispatchmethod
|
||||
@staticmethod
|
||||
def _encode_to_tensor(vae: AutoencoderKL, image_tensor: torch.FloatTensor) -> torch.FloatTensor:
|
||||
assert isinstance(vae, torch.nn.Module)
|
||||
image_tensor_dist = vae.encode(image_tensor).latent_dist
|
||||
latents: torch.Tensor = image_tensor_dist.sample().to(
|
||||
dtype=vae.dtype
|
||||
) # FIXME: uses torch.randn. make reproducible!
|
||||
return latents
|
||||
|
||||
@_encode_to_tensor.register
|
||||
@staticmethod
|
||||
def _(vae: AutoencoderTiny, image_tensor: torch.FloatTensor) -> torch.FloatTensor:
|
||||
assert isinstance(vae, torch.nn.Module)
|
||||
latents: torch.FloatTensor = vae.encode(image_tensor).latents
|
||||
return latents
|
||||
File diff suppressed because it is too large
Load Diff
127
invokeai/app/invocations/latents_to_image.py
Normal file
127
invokeai/app/invocations/latents_to_image.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import torch
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.models.attention_processor import (
|
||||
AttnProcessor2_0,
|
||||
LoRAAttnProcessor2_0,
|
||||
LoRAXFormersAttnProcessor,
|
||||
XFormersAttnProcessor,
|
||||
)
|
||||
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
|
||||
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
|
||||
from PIL import Image
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import DEFAULT_PRECISION
|
||||
from invokeai.app.invocations.fields import (
|
||||
FieldDescriptions,
|
||||
Input,
|
||||
InputField,
|
||||
LatentsField,
|
||||
WithBoard,
|
||||
WithMetadata,
|
||||
)
|
||||
from invokeai.app.invocations.model import VAEField
|
||||
from invokeai.app.invocations.primitives import ImageOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.model_manager.load.load_base import LoadedModel
|
||||
from invokeai.backend.stable_diffusion import set_seamless
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
|
||||
@invocation(
|
||||
"l2i",
|
||||
title="Latents to Image",
|
||||
tags=["latents", "image", "vae", "l2i"],
|
||||
category="latents",
|
||||
version="1.2.2",
|
||||
)
|
||||
class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
|
||||
"""Generates an image from latents."""
|
||||
|
||||
latents: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
vae: VAEField = InputField(
|
||||
description=FieldDescriptions.vae,
|
||||
input=Input.Connection,
|
||||
)
|
||||
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
|
||||
fp32: bool = InputField(default=DEFAULT_PRECISION == torch.float32, description=FieldDescriptions.fp32)
|
||||
|
||||
@staticmethod
|
||||
def vae_decode(
|
||||
context: InvocationContext,
|
||||
vae_info: LoadedModel,
|
||||
seamless_axes: list[str],
|
||||
latents: torch.Tensor,
|
||||
use_fp32: bool,
|
||||
use_tiling: bool,
|
||||
) -> Image.Image:
|
||||
assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
|
||||
with set_seamless(vae_info.model, seamless_axes), vae_info as vae:
|
||||
assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
|
||||
latents = latents.to(vae.device)
|
||||
if use_fp32:
|
||||
vae.to(dtype=torch.float32)
|
||||
|
||||
use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
|
||||
vae.decoder.mid_block.attentions[0].processor,
|
||||
(
|
||||
AttnProcessor2_0,
|
||||
XFormersAttnProcessor,
|
||||
LoRAXFormersAttnProcessor,
|
||||
LoRAAttnProcessor2_0,
|
||||
),
|
||||
)
|
||||
# if xformers or torch_2_0 is used attention block does not need
|
||||
# to be in float32 which can save lots of memory
|
||||
if use_torch_2_0_or_xformers:
|
||||
vae.post_quant_conv.to(latents.dtype)
|
||||
vae.decoder.conv_in.to(latents.dtype)
|
||||
vae.decoder.mid_block.to(latents.dtype)
|
||||
else:
|
||||
latents = latents.float()
|
||||
|
||||
else:
|
||||
vae.to(dtype=torch.float16)
|
||||
latents = latents.half()
|
||||
|
||||
if use_tiling or context.config.get().force_tiled_decode:
|
||||
vae.enable_tiling()
|
||||
else:
|
||||
vae.disable_tiling()
|
||||
|
||||
# clear memory as vae decode can request a lot
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
with torch.inference_mode():
|
||||
# copied from diffusers pipeline
|
||||
latents = latents / vae.config.scaling_factor
|
||||
image = vae.decode(latents, return_dict=False)[0]
|
||||
image = (image / 2 + 0.5).clamp(0, 1) # denormalize
|
||||
# we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
|
||||
np_image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
||||
|
||||
image = VaeImageProcessor.numpy_to_pil(np_image)[0]
|
||||
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
return image
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
latents = context.tensors.load(self.latents.latents_name)
|
||||
vae_info = context.models.load(self.vae.vae)
|
||||
|
||||
image = self.vae_decode(
|
||||
context=context,
|
||||
vae_info=vae_info,
|
||||
seamless_axes=self.vae.seamless_axes,
|
||||
latents=latents,
|
||||
use_fp32=self.fp32,
|
||||
use_tiling=self.tiled,
|
||||
)
|
||||
image_dto = context.images.save(image=image)
|
||||
|
||||
return ImageOutput.build(image_dto)
|
||||
103
invokeai/app/invocations/resize_latents.py
Normal file
103
invokeai/app/invocations/resize_latents.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from typing import Literal
|
||||
|
||||
import torch
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
|
||||
from invokeai.app.invocations.fields import (
|
||||
FieldDescriptions,
|
||||
Input,
|
||||
InputField,
|
||||
LatentsField,
|
||||
)
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
LATENTS_INTERPOLATION_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"]
|
||||
|
||||
|
||||
@invocation(
|
||||
"lresize",
|
||||
title="Resize Latents",
|
||||
tags=["latents", "resize"],
|
||||
category="latents",
|
||||
version="1.0.2",
|
||||
)
|
||||
class ResizeLatentsInvocation(BaseInvocation):
|
||||
"""Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8."""
|
||||
|
||||
latents: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
width: int = InputField(
|
||||
ge=64,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description=FieldDescriptions.width,
|
||||
)
|
||||
height: int = InputField(
|
||||
ge=64,
|
||||
multiple_of=LATENT_SCALE_FACTOR,
|
||||
description=FieldDescriptions.width,
|
||||
)
|
||||
mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode)
|
||||
antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
latents = context.tensors.load(self.latents.latents_name)
|
||||
device = TorchDevice.choose_torch_device()
|
||||
|
||||
resized_latents = torch.nn.functional.interpolate(
|
||||
latents.to(device),
|
||||
size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR),
|
||||
mode=self.mode,
|
||||
antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False,
|
||||
)
|
||||
|
||||
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
|
||||
resized_latents = resized_latents.to("cpu")
|
||||
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
name = context.tensors.save(tensor=resized_latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
|
||||
|
||||
|
||||
@invocation(
|
||||
"lscale",
|
||||
title="Scale Latents",
|
||||
tags=["latents", "resize"],
|
||||
category="latents",
|
||||
version="1.0.2",
|
||||
)
|
||||
class ScaleLatentsInvocation(BaseInvocation):
|
||||
"""Scales latents by a given factor."""
|
||||
|
||||
latents: LatentsField = InputField(
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
scale_factor: float = InputField(gt=0, description=FieldDescriptions.scale_factor)
|
||||
mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode)
|
||||
antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
latents = context.tensors.load(self.latents.latents_name)
|
||||
|
||||
device = TorchDevice.choose_torch_device()
|
||||
|
||||
# resizing
|
||||
resized_latents = torch.nn.functional.interpolate(
|
||||
latents.to(device),
|
||||
scale_factor=self.scale_factor,
|
||||
mode=self.mode,
|
||||
antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False,
|
||||
)
|
||||
|
||||
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
|
||||
resized_latents = resized_latents.to("cpu")
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
name = context.tensors.save(tensor=resized_latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
|
||||
34
invokeai/app/invocations/scheduler.py
Normal file
34
invokeai/app/invocations/scheduler.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
|
||||
from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES
|
||||
from invokeai.app.invocations.fields import (
|
||||
FieldDescriptions,
|
||||
InputField,
|
||||
OutputField,
|
||||
UIType,
|
||||
)
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
|
||||
|
||||
@invocation_output("scheduler_output")
|
||||
class SchedulerOutput(BaseInvocationOutput):
|
||||
scheduler: SCHEDULER_NAME_VALUES = OutputField(description=FieldDescriptions.scheduler, ui_type=UIType.Scheduler)
|
||||
|
||||
|
||||
@invocation(
|
||||
"scheduler",
|
||||
title="Scheduler",
|
||||
tags=["scheduler"],
|
||||
category="latents",
|
||||
version="1.0.0",
|
||||
)
|
||||
class SchedulerInvocation(BaseInvocation):
|
||||
"""Selects a scheduler."""
|
||||
|
||||
scheduler: SCHEDULER_NAME_VALUES = InputField(
|
||||
default="euler",
|
||||
description=FieldDescriptions.scheduler,
|
||||
ui_type=UIType.Scheduler,
|
||||
)
|
||||
|
||||
def invoke(self, context: InvocationContext) -> SchedulerOutput:
|
||||
return SchedulerOutput(scheduler=self.scheduler)
|
||||
@@ -0,0 +1,268 @@
|
||||
import copy
|
||||
from contextlib import ExitStack
|
||||
from typing import Iterator, Tuple
|
||||
|
||||
import torch
|
||||
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
||||
from pydantic import field_validator
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
|
||||
from invokeai.app.invocations.controlnet_image_processors import ControlField
|
||||
from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation, get_scheduler
|
||||
from invokeai.app.invocations.fields import (
|
||||
ConditioningField,
|
||||
FieldDescriptions,
|
||||
Input,
|
||||
InputField,
|
||||
LatentsField,
|
||||
UIType,
|
||||
)
|
||||
from invokeai.app.invocations.model import UNetField
|
||||
from invokeai.app.invocations.primitives import LatentsOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.backend.lora import LoRAModelRaw
|
||||
from invokeai.backend.model_patcher import ModelPatcher
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import ControlNetData
|
||||
from invokeai.backend.stable_diffusion.multi_diffusion_pipeline import (
|
||||
MultiDiffusionPipeline,
|
||||
MultiDiffusionRegionConditioning,
|
||||
)
|
||||
from invokeai.backend.tiles.tiles import (
|
||||
calc_tiles_min_overlap,
|
||||
)
|
||||
from invokeai.backend.tiles.utils import TBLR
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
|
||||
def crop_controlnet_data(control_data: ControlNetData, latent_region: TBLR) -> ControlNetData:
|
||||
"""Crop a ControlNetData object to a region."""
|
||||
# Create a shallow copy of the control_data object.
|
||||
control_data_copy = copy.copy(control_data)
|
||||
# The ControlNet reference image is the only attribute that needs to be cropped.
|
||||
control_data_copy.image_tensor = control_data.image_tensor[
|
||||
:,
|
||||
:,
|
||||
latent_region.top * LATENT_SCALE_FACTOR : latent_region.bottom * LATENT_SCALE_FACTOR,
|
||||
latent_region.left * LATENT_SCALE_FACTOR : latent_region.right * LATENT_SCALE_FACTOR,
|
||||
]
|
||||
return control_data_copy
|
||||
|
||||
|
||||
@invocation(
|
||||
"tiled_multi_diffusion_denoise_latents",
|
||||
title="Tiled Multi-Diffusion Denoise Latents",
|
||||
tags=["upscale", "denoise"],
|
||||
category="latents",
|
||||
# TODO(ryand): Reset to 1.0.0 right before release.
|
||||
version="1.0.0",
|
||||
)
|
||||
class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
|
||||
"""Tiled Multi-Diffusion denoising.
|
||||
|
||||
This node handles automatically tiling the input image. Future iterations of
|
||||
this node should allow the user to specify custom regions with different parameters for each region to harness the
|
||||
full power of Multi-Diffusion.
|
||||
|
||||
This node has a similar interface to the `DenoiseLatents` node, but it has a reduced feature set (no IP-Adapter,
|
||||
T2I-Adapter, masking, etc.).
|
||||
"""
|
||||
|
||||
positive_conditioning: ConditioningField = InputField(
|
||||
description=FieldDescriptions.positive_cond, input=Input.Connection
|
||||
)
|
||||
negative_conditioning: ConditioningField = InputField(
|
||||
description=FieldDescriptions.negative_cond, input=Input.Connection
|
||||
)
|
||||
noise: LatentsField | None = InputField(
|
||||
default=None,
|
||||
description=FieldDescriptions.noise,
|
||||
input=Input.Connection,
|
||||
)
|
||||
latents: LatentsField | None = InputField(
|
||||
default=None,
|
||||
description=FieldDescriptions.latents,
|
||||
input=Input.Connection,
|
||||
)
|
||||
# TODO(ryand): Add multiple-of validation.
|
||||
# TODO(ryand): Smaller defaults might make more sense.
|
||||
tile_height: int = InputField(default=112, gt=0, description="Height of the tiles in latent space.")
|
||||
tile_width: int = InputField(default=112, gt=0, description="Width of the tiles in latent space.")
|
||||
tile_min_overlap: int = InputField(
|
||||
default=16,
|
||||
gt=0,
|
||||
description="The minimum overlap between adjacent tiles in latent space. The actual overlap may be larger than "
|
||||
"this to evenly cover the entire image.",
|
||||
)
|
||||
steps: int = InputField(default=18, gt=0, description=FieldDescriptions.steps)
|
||||
cfg_scale: float | list[float] = InputField(default=6.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
|
||||
# TODO(ryand): The default here should probably be 0.0.
|
||||
denoising_start: float = InputField(
|
||||
default=0.65,
|
||||
ge=0,
|
||||
le=1,
|
||||
description=FieldDescriptions.denoising_start,
|
||||
)
|
||||
denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
|
||||
scheduler: SCHEDULER_NAME_VALUES = InputField(
|
||||
default="euler",
|
||||
description=FieldDescriptions.scheduler,
|
||||
ui_type=UIType.Scheduler,
|
||||
)
|
||||
unet: UNetField = InputField(
|
||||
description=FieldDescriptions.unet,
|
||||
input=Input.Connection,
|
||||
title="UNet",
|
||||
)
|
||||
cfg_rescale_multiplier: float = InputField(
|
||||
title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
|
||||
)
|
||||
control: ControlField | list[ControlField] | None = InputField(
|
||||
default=None,
|
||||
input=Input.Connection,
|
||||
)
|
||||
|
||||
@field_validator("cfg_scale")
|
||||
def ge_one(cls, v: list[float] | float) -> list[float] | float:
|
||||
"""Validate that all cfg_scale values are >= 1"""
|
||||
if isinstance(v, list):
|
||||
for i in v:
|
||||
if i < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
else:
|
||||
if v < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
return v
|
||||
|
||||
@staticmethod
|
||||
def create_pipeline(
|
||||
unet: UNet2DConditionModel,
|
||||
scheduler: SchedulerMixin,
|
||||
) -> MultiDiffusionPipeline:
|
||||
# TODO(ryand): Get rid of this FakeVae hack.
|
||||
class FakeVae:
|
||||
class FakeVaeConfig:
|
||||
def __init__(self) -> None:
|
||||
self.block_out_channels = [0]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.config = FakeVae.FakeVaeConfig()
|
||||
|
||||
return MultiDiffusionPipeline(
|
||||
vae=FakeVae(), # TODO: oh...
|
||||
text_encoder=None,
|
||||
tokenizer=None,
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
safety_checker=None,
|
||||
feature_extractor=None,
|
||||
requires_safety_checker=False,
|
||||
)
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
||||
seed, noise, latents = DenoiseLatentsInvocation.prepare_noise_and_latents(context, self.noise, self.latents)
|
||||
_, _, latent_height, latent_width = latents.shape
|
||||
|
||||
# Calculate the tile locations to cover the latent-space image.
|
||||
# TODO(ryand): Add constraints on the tile params. Is there a multiple-of constraint?
|
||||
tiles = calc_tiles_min_overlap(
|
||||
image_height=latent_height,
|
||||
image_width=latent_width,
|
||||
tile_height=self.tile_height,
|
||||
tile_width=self.tile_width,
|
||||
min_overlap=self.tile_min_overlap,
|
||||
)
|
||||
|
||||
# Prepare an iterator that yields the UNet's LoRA models and their weights.
|
||||
def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
|
||||
for lora in self.unet.loras:
|
||||
lora_info = context.models.load(lora.lora)
|
||||
assert isinstance(lora_info.model, LoRAModelRaw)
|
||||
yield (lora_info.model, lora.weight)
|
||||
del lora_info
|
||||
|
||||
# Load the UNet model.
|
||||
unet_info = context.models.load(self.unet.unet)
|
||||
|
||||
with ExitStack() as exit_stack, unet_info as unet, ModelPatcher.apply_lora_unet(unet, _lora_loader()):
|
||||
assert isinstance(unet, UNet2DConditionModel)
|
||||
latents = latents.to(device=unet.device, dtype=unet.dtype)
|
||||
if noise is not None:
|
||||
noise = noise.to(device=unet.device, dtype=unet.dtype)
|
||||
scheduler = get_scheduler(
|
||||
context=context,
|
||||
scheduler_info=self.unet.scheduler,
|
||||
scheduler_name=self.scheduler,
|
||||
seed=seed,
|
||||
)
|
||||
pipeline = self.create_pipeline(unet=unet, scheduler=scheduler)
|
||||
|
||||
# Prepare the prompt conditioning data. The same prompt conditioning is applied to all tiles.
|
||||
conditioning_data = DenoiseLatentsInvocation.get_conditioning_data(
|
||||
context=context,
|
||||
positive_conditioning_field=self.positive_conditioning,
|
||||
negative_conditioning_field=self.negative_conditioning,
|
||||
unet=unet,
|
||||
latent_height=self.tile_height,
|
||||
latent_width=self.tile_width,
|
||||
cfg_scale=self.cfg_scale,
|
||||
steps=self.steps,
|
||||
cfg_rescale_multiplier=self.cfg_rescale_multiplier,
|
||||
)
|
||||
|
||||
controlnet_data = DenoiseLatentsInvocation.prep_control_data(
|
||||
context=context,
|
||||
control_input=self.control,
|
||||
latents_shape=list(latents.shape),
|
||||
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
|
||||
do_classifier_free_guidance=True,
|
||||
exit_stack=exit_stack,
|
||||
)
|
||||
|
||||
# Split the controlnet_data into tiles.
|
||||
# controlnet_data_tiles[t][c] is the c'th control data for the t'th tile.
|
||||
controlnet_data_tiles: list[list[ControlNetData]] = []
|
||||
for tile in tiles:
|
||||
tile_controlnet_data = [crop_controlnet_data(cn, tile.coords) for cn in controlnet_data or []]
|
||||
controlnet_data_tiles.append(tile_controlnet_data)
|
||||
|
||||
# Prepare the MultiDiffusionRegionConditioning list.
|
||||
multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning] = []
|
||||
for tile, tile_controlnet_data in zip(tiles, controlnet_data_tiles, strict=True):
|
||||
multi_diffusion_conditioning.append(
|
||||
MultiDiffusionRegionConditioning(
|
||||
region=tile.coords,
|
||||
text_conditioning_data=conditioning_data,
|
||||
control_data=tile_controlnet_data,
|
||||
)
|
||||
)
|
||||
|
||||
timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
|
||||
scheduler,
|
||||
device=unet.device,
|
||||
steps=self.steps,
|
||||
denoising_start=self.denoising_start,
|
||||
denoising_end=self.denoising_end,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
# Run Multi-Diffusion denoising.
|
||||
result_latents = pipeline.multi_diffusion_denoise(
|
||||
multi_diffusion_conditioning=multi_diffusion_conditioning,
|
||||
latents=latents,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
noise=noise,
|
||||
timesteps=timesteps,
|
||||
init_timestep=init_timestep,
|
||||
# TODO(ryand): Add proper callback.
|
||||
callback=lambda x: None,
|
||||
)
|
||||
|
||||
# TODO(ryand): I copied this from DenoiseLatentsInvocation. I'm not sure if it's actually important.
|
||||
result_latents = result_latents.to("cpu")
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
name = context.tensors.save(tensor=result_latents)
|
||||
return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
|
||||
380
invokeai/app/invocations/tiled_stable_diffusion_refine.py
Normal file
380
invokeai/app/invocations/tiled_stable_diffusion_refine.py
Normal file
@@ -0,0 +1,380 @@
|
||||
from contextlib import ExitStack
|
||||
from typing import Iterator, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import torch
|
||||
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
||||
from PIL import Image
|
||||
from pydantic import field_validator
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||
from invokeai.app.invocations.constants import DEFAULT_PRECISION, LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
|
||||
from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation, get_scheduler
|
||||
from invokeai.app.invocations.fields import (
|
||||
ConditioningField,
|
||||
FieldDescriptions,
|
||||
ImageField,
|
||||
Input,
|
||||
InputField,
|
||||
UIType,
|
||||
)
|
||||
from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
|
||||
from invokeai.app.invocations.latents_to_image import LatentsToImageInvocation
|
||||
from invokeai.app.invocations.model import ModelIdentifierField, UNetField, VAEField
|
||||
from invokeai.app.invocations.noise import get_noise
|
||||
from invokeai.app.invocations.primitives import ImageOutput
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, prepare_control_image
|
||||
from invokeai.backend.lora import LoRAModelRaw
|
||||
from invokeai.backend.model_patcher import ModelPatcher
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import ControlNetData, image_resized_to_grid_as_tensor
|
||||
from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending
|
||||
from invokeai.backend.tiles.utils import Tile
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
from invokeai.backend.util.hotfixes import ControlNetModel
|
||||
|
||||
|
||||
@invocation(
|
||||
"tiled_stable_diffusion_refine",
|
||||
title="Tiled Stable Diffusion Refine",
|
||||
tags=["upscale", "denoise"],
|
||||
category="latents",
|
||||
version="1.0.0",
|
||||
)
|
||||
class TiledStableDiffusionRefineInvocation(BaseInvocation):
|
||||
"""A tiled Stable Diffusion pipeline for refining high resolution images. This invocation is intended to be used to
|
||||
refine an image after upscaling i.e. it is the second step in a typical "tiled upscaling" workflow.
|
||||
"""
|
||||
|
||||
image: ImageField = InputField(description="Image to be refined.")
|
||||
|
||||
positive_conditioning: ConditioningField = InputField(
|
||||
description=FieldDescriptions.positive_cond, input=Input.Connection
|
||||
)
|
||||
negative_conditioning: ConditioningField = InputField(
|
||||
description=FieldDescriptions.negative_cond, input=Input.Connection
|
||||
)
|
||||
# TODO(ryand): Add multiple-of validation.
|
||||
tile_height: int = InputField(default=512, gt=0, description="Height of the tiles.")
|
||||
tile_width: int = InputField(default=512, gt=0, description="Width of the tiles.")
|
||||
tile_overlap: int = InputField(
|
||||
default=16,
|
||||
gt=0,
|
||||
description="Target overlap between adjacent tiles (the last row/column may overlap more than this).",
|
||||
)
|
||||
steps: int = InputField(default=18, gt=0, description=FieldDescriptions.steps)
|
||||
cfg_scale: float | list[float] = InputField(default=6.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
|
||||
denoising_start: float = InputField(
|
||||
default=0.65,
|
||||
ge=0,
|
||||
le=1,
|
||||
description=FieldDescriptions.denoising_start,
|
||||
)
|
||||
denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
|
||||
scheduler: SCHEDULER_NAME_VALUES = InputField(
|
||||
default="euler",
|
||||
description=FieldDescriptions.scheduler,
|
||||
ui_type=UIType.Scheduler,
|
||||
)
|
||||
unet: UNetField = InputField(
|
||||
description=FieldDescriptions.unet,
|
||||
input=Input.Connection,
|
||||
title="UNet",
|
||||
)
|
||||
cfg_rescale_multiplier: float = InputField(
|
||||
title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
|
||||
)
|
||||
vae: VAEField = InputField(
|
||||
description=FieldDescriptions.vae,
|
||||
input=Input.Connection,
|
||||
)
|
||||
vae_fp32: bool = InputField(
|
||||
default=DEFAULT_PRECISION == torch.float32, description="Whether to use float32 precision when running the VAE."
|
||||
)
|
||||
# HACK(ryand): We probably want to allow the user to control all of the parameters in ControlField. But, we akwardly
|
||||
# don't want to use the image field. Figure out how best to handle this.
|
||||
# TODO(ryand): Currently, there is no ControlNet preprocessor applied to the tile images. In other words, we pretty
|
||||
# much assume that it is a tile ControlNet. We need to decide how we want to handle this. E.g. find a way to support
|
||||
# CN preprocessors, raise a clear warning when a non-tile CN model is selected, hardcode the supported CN models,
|
||||
# etc.
|
||||
control_model: ModelIdentifierField = InputField(
|
||||
description=FieldDescriptions.controlnet_model, ui_type=UIType.ControlNetModel
|
||||
)
|
||||
control_weight: float = InputField(default=0.6)
|
||||
|
||||
@field_validator("cfg_scale")
|
||||
def ge_one(cls, v: list[float] | float) -> list[float] | float:
|
||||
"""Validate that all cfg_scale values are >= 1"""
|
||||
if isinstance(v, list):
|
||||
for i in v:
|
||||
if i < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
else:
|
||||
if v < 1:
|
||||
raise ValueError("cfg_scale must be greater than 1")
|
||||
return v
|
||||
|
||||
@staticmethod
|
||||
def crop_latents_to_tile(latents: torch.Tensor, image_tile: Tile) -> torch.Tensor:
|
||||
"""Crop the latent-space tensor to the area corresponding to the image-space tile.
|
||||
The tile coordinates must be divisible by the LATENT_SCALE_FACTOR.
|
||||
"""
|
||||
for coord in [image_tile.coords.top, image_tile.coords.left, image_tile.coords.right, image_tile.coords.bottom]:
|
||||
if coord % LATENT_SCALE_FACTOR != 0:
|
||||
raise ValueError(
|
||||
f"The tile coordinates must all be divisible by the latent scale factor"
|
||||
f" ({LATENT_SCALE_FACTOR}). {image_tile.coords=}."
|
||||
)
|
||||
assert latents.dim() == 4 # We expect: (batch_size, channels, height, width).
|
||||
|
||||
top = image_tile.coords.top // LATENT_SCALE_FACTOR
|
||||
left = image_tile.coords.left // LATENT_SCALE_FACTOR
|
||||
bottom = image_tile.coords.bottom // LATENT_SCALE_FACTOR
|
||||
right = image_tile.coords.right // LATENT_SCALE_FACTOR
|
||||
return latents[..., top:bottom, left:right]
|
||||
|
||||
def run_controlnet(
|
||||
self,
|
||||
image: Image.Image,
|
||||
controlnet_model: ControlNetModel,
|
||||
weight: float,
|
||||
do_classifier_free_guidance: bool,
|
||||
width: int,
|
||||
height: int,
|
||||
device: torch.device,
|
||||
dtype: torch.dtype,
|
||||
control_mode: CONTROLNET_MODE_VALUES = "balanced",
|
||||
resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple",
|
||||
) -> ControlNetData:
|
||||
control_image = prepare_control_image(
|
||||
image=image,
|
||||
do_classifier_free_guidance=do_classifier_free_guidance,
|
||||
width=width,
|
||||
height=height,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
control_mode=control_mode,
|
||||
resize_mode=resize_mode,
|
||||
)
|
||||
return ControlNetData(
|
||||
model=controlnet_model,
|
||||
image_tensor=control_image,
|
||||
weight=weight,
|
||||
begin_step_percent=0.0,
|
||||
end_step_percent=1.0,
|
||||
control_mode=control_mode,
|
||||
# Any resizing needed should currently be happening in prepare_control_image(), but adding resize_mode to
|
||||
# ControlNetData in case needed in the future.
|
||||
resize_mode=resize_mode,
|
||||
)
|
||||
|
||||
@torch.no_grad()
|
||||
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||
# TODO(ryand): Expose the seed parameter.
|
||||
seed = 0
|
||||
|
||||
# Load the input image.
|
||||
input_image = context.images.get_pil(self.image.image_name)
|
||||
|
||||
# Calculate the tile locations to cover the image.
|
||||
# We have selected this tiling strategy to make it easy to achieve tile coords that are multiples of 8. This
|
||||
# facilitates conversions between image space and latent space.
|
||||
# TODO(ryand): Expose these tiling parameters. (Keep in mind the multiple-of constraints on these params.)
|
||||
tiles = calc_tiles_with_overlap(
|
||||
image_height=input_image.height,
|
||||
image_width=input_image.width,
|
||||
tile_height=self.tile_height,
|
||||
tile_width=self.tile_width,
|
||||
overlap=self.tile_overlap,
|
||||
)
|
||||
|
||||
# Convert the input image to a torch.Tensor.
|
||||
input_image_torch = image_resized_to_grid_as_tensor(input_image.convert("RGB"), multiple_of=LATENT_SCALE_FACTOR)
|
||||
input_image_torch = input_image_torch.unsqueeze(0) # Add a batch dimension.
|
||||
# Validate our assumptions about the shape of input_image_torch.
|
||||
assert input_image_torch.dim() == 4 # We expect: (batch_size, channels, height, width).
|
||||
assert input_image_torch.shape[:2] == (1, 3)
|
||||
|
||||
# Split the input image into tiles in torch.Tensor format.
|
||||
image_tiles_torch: list[torch.Tensor] = []
|
||||
for tile in tiles:
|
||||
image_tile = input_image_torch[
|
||||
:,
|
||||
:,
|
||||
tile.coords.top : tile.coords.bottom,
|
||||
tile.coords.left : tile.coords.right,
|
||||
]
|
||||
image_tiles_torch.append(image_tile)
|
||||
|
||||
# Split the input image into tiles in numpy format.
|
||||
# TODO(ryand): We currently maintain both np.ndarray and torch.Tensor tiles. Ideally, all operations should work
|
||||
# with torch.Tensor tiles.
|
||||
input_image_np = np.array(input_image)
|
||||
image_tiles_np: list[npt.NDArray[np.uint8]] = []
|
||||
for tile in tiles:
|
||||
image_tile_np = input_image_np[
|
||||
tile.coords.top : tile.coords.bottom,
|
||||
tile.coords.left : tile.coords.right,
|
||||
:,
|
||||
]
|
||||
image_tiles_np.append(image_tile_np)
|
||||
|
||||
# VAE-encode each image tile independently.
|
||||
# TODO(ryand): Is there any advantage to VAE-encoding the entire image before splitting it into tiles? What
|
||||
# about for decoding?
|
||||
vae_info = context.models.load(self.vae.vae)
|
||||
latent_tiles: list[torch.Tensor] = []
|
||||
for image_tile_torch in image_tiles_torch:
|
||||
latent_tiles.append(
|
||||
ImageToLatentsInvocation.vae_encode(
|
||||
vae_info=vae_info, upcast=self.vae_fp32, tiled=False, image_tensor=image_tile_torch
|
||||
)
|
||||
)
|
||||
|
||||
# Generate noise with dimensions corresponding to the full image in latent space.
|
||||
# It is important that the noise tensor is generated at the full image dimension and then tiled, rather than
|
||||
# generating for each tile independently. This ensures that overlapping regions between tiles use the same
|
||||
# noise.
|
||||
assert input_image_torch.shape[2] % LATENT_SCALE_FACTOR == 0
|
||||
assert input_image_torch.shape[3] % LATENT_SCALE_FACTOR == 0
|
||||
global_noise = get_noise(
|
||||
width=input_image_torch.shape[3],
|
||||
height=input_image_torch.shape[2],
|
||||
device=TorchDevice.choose_torch_device(),
|
||||
seed=seed,
|
||||
downsampling_factor=LATENT_SCALE_FACTOR,
|
||||
use_cpu=True,
|
||||
)
|
||||
|
||||
# Crop the global noise into tiles.
|
||||
noise_tiles = [self.crop_latents_to_tile(latents=global_noise, image_tile=t) for t in tiles]
|
||||
|
||||
# Prepare an iterator that yields the UNet's LoRA models and their weights.
|
||||
def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
|
||||
for lora in self.unet.loras:
|
||||
lora_info = context.models.load(lora.lora)
|
||||
assert isinstance(lora_info.model, LoRAModelRaw)
|
||||
yield (lora_info.model, lora.weight)
|
||||
del lora_info
|
||||
|
||||
# Load the UNet model.
|
||||
unet_info = context.models.load(self.unet.unet)
|
||||
|
||||
refined_latent_tiles: list[torch.Tensor] = []
|
||||
with ExitStack() as exit_stack, unet_info as unet, ModelPatcher.apply_lora_unet(unet, _lora_loader()):
|
||||
assert isinstance(unet, UNet2DConditionModel)
|
||||
scheduler = get_scheduler(
|
||||
context=context,
|
||||
scheduler_info=self.unet.scheduler,
|
||||
scheduler_name=self.scheduler,
|
||||
seed=seed,
|
||||
)
|
||||
pipeline = DenoiseLatentsInvocation.create_pipeline(unet=unet, scheduler=scheduler)
|
||||
|
||||
# Prepare the prompt conditioning data. The same prompt conditioning is applied to all tiles.
|
||||
# Assume that all tiles have the same shape.
|
||||
_, _, latent_height, latent_width = latent_tiles[0].shape
|
||||
conditioning_data = DenoiseLatentsInvocation.get_conditioning_data(
|
||||
context=context,
|
||||
positive_conditioning_field=self.positive_conditioning,
|
||||
negative_conditioning_field=self.negative_conditioning,
|
||||
unet=unet,
|
||||
latent_height=latent_height,
|
||||
latent_width=latent_width,
|
||||
cfg_scale=self.cfg_scale,
|
||||
steps=self.steps,
|
||||
cfg_rescale_multiplier=self.cfg_rescale_multiplier,
|
||||
)
|
||||
|
||||
# Load the ControlNet model.
|
||||
# TODO(ryand): Support multiple ControlNet models.
|
||||
controlnet_model = exit_stack.enter_context(context.models.load(self.control_model))
|
||||
assert isinstance(controlnet_model, ControlNetModel)
|
||||
|
||||
# Denoise (i.e. "refine") each tile independently.
|
||||
for image_tile_np, latent_tile, noise_tile in zip(image_tiles_np, latent_tiles, noise_tiles, strict=True):
|
||||
assert latent_tile.shape == noise_tile.shape
|
||||
|
||||
# Prepare a PIL Image for ControlNet processing.
|
||||
# TODO(ryand): This is a bit awkward that we have to prepare both torch.Tensor and PIL.Image versions of
|
||||
# the tiles. Ideally, the ControlNet code should be able to work with Tensors.
|
||||
image_tile_pil = Image.fromarray(image_tile_np)
|
||||
|
||||
# Run the ControlNet on the image tile.
|
||||
height, width, _ = image_tile_np.shape
|
||||
# The height and width must be evenly divisible by LATENT_SCALE_FACTOR. This is enforced earlier, but we
|
||||
# validate this assumption here.
|
||||
assert height % LATENT_SCALE_FACTOR == 0
|
||||
assert width % LATENT_SCALE_FACTOR == 0
|
||||
controlnet_data = self.run_controlnet(
|
||||
image=image_tile_pil,
|
||||
controlnet_model=controlnet_model,
|
||||
weight=self.control_weight,
|
||||
do_classifier_free_guidance=True,
|
||||
width=width,
|
||||
height=height,
|
||||
device=controlnet_model.device,
|
||||
dtype=controlnet_model.dtype,
|
||||
control_mode="balanced",
|
||||
resize_mode="just_resize_simple",
|
||||
)
|
||||
|
||||
timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
|
||||
scheduler,
|
||||
device=unet.device,
|
||||
steps=self.steps,
|
||||
denoising_start=self.denoising_start,
|
||||
denoising_end=self.denoising_end,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
# TODO(ryand): Think about when/if latents/noise should be moved off of the device to save VRAM.
|
||||
latent_tile = latent_tile.to(device=unet.device, dtype=unet.dtype)
|
||||
noise_tile = noise_tile.to(device=unet.device, dtype=unet.dtype)
|
||||
refined_latent_tile = pipeline.latents_from_embeddings(
|
||||
latents=latent_tile,
|
||||
timesteps=timesteps,
|
||||
init_timestep=init_timestep,
|
||||
noise=noise_tile,
|
||||
seed=seed,
|
||||
mask=None,
|
||||
masked_latents=None,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
conditioning_data=conditioning_data,
|
||||
control_data=[controlnet_data],
|
||||
ip_adapter_data=None,
|
||||
t2i_adapter_data=None,
|
||||
callback=lambda x: None,
|
||||
)
|
||||
refined_latent_tiles.append(refined_latent_tile)
|
||||
|
||||
# VAE-decode each refined latent tile independently.
|
||||
refined_image_tiles: list[Image.Image] = []
|
||||
for refined_latent_tile in refined_latent_tiles:
|
||||
refined_image_tile = LatentsToImageInvocation.vae_decode(
|
||||
context=context,
|
||||
vae_info=vae_info,
|
||||
seamless_axes=self.vae.seamless_axes,
|
||||
latents=refined_latent_tile,
|
||||
use_fp32=self.vae_fp32,
|
||||
use_tiling=False,
|
||||
)
|
||||
refined_image_tiles.append(refined_image_tile)
|
||||
|
||||
# TODO(ryand): I copied this from DenoiseLatentsInvocation. I'm not sure if it's actually important.
|
||||
TorchDevice.empty_cache()
|
||||
|
||||
# Merge the refined image tiles back into a single image.
|
||||
refined_image_tiles_np = [np.array(t) for t in refined_image_tiles]
|
||||
merged_image_np = np.zeros(shape=(input_image.height, input_image.width, 3), dtype=np.uint8)
|
||||
# TODO(ryand): Tune the blend_amount. Should this be exposed as a parameter?
|
||||
merge_tiles_with_linear_blending(
|
||||
dst_image=merged_image_np, tiles=tiles, tile_images=refined_image_tiles_np, blend_amount=self.tile_overlap
|
||||
)
|
||||
|
||||
# Save the refined image and return its reference.
|
||||
merged_image_pil = Image.fromarray(merged_image_np)
|
||||
image_dto = context.images.save(image=merged_image_pil)
|
||||
|
||||
return ImageOutput.build(image_dto)
|
||||
@@ -8,7 +8,7 @@ import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from queue import Empty, PriorityQueue
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set
|
||||
from typing import Any, Dict, List, Literal, Optional, Set
|
||||
|
||||
import requests
|
||||
from pydantic.networks import AnyHttpUrl
|
||||
@@ -34,9 +34,6 @@ from .download_base import (
|
||||
UnknownJobIDException,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from invokeai.app.services.events.events_base import EventServiceBase
|
||||
|
||||
# Maximum number of bytes to download during each call to requests.iter_content()
|
||||
DOWNLOAD_CHUNK_SIZE = 100000
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from invokeai.app.services.events.events_common import (
|
||||
ModelInstallCompleteEvent,
|
||||
ModelInstallDownloadProgressEvent,
|
||||
ModelInstallDownloadsCompleteEvent,
|
||||
ModelInstallDownloadStartedEvent,
|
||||
ModelInstallErrorEvent,
|
||||
ModelInstallStartedEvent,
|
||||
ModelLoadCompleteEvent,
|
||||
@@ -34,7 +35,6 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineInterme
|
||||
if TYPE_CHECKING:
|
||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput
|
||||
from invokeai.app.services.download.download_base import DownloadJob
|
||||
from invokeai.app.services.events.events_common import EventBase
|
||||
from invokeai.app.services.model_install.model_install_common import ModelInstallJob
|
||||
from invokeai.app.services.session_processor.session_processor_common import ProgressImage
|
||||
from invokeai.app.services.session_queue.session_queue_common import (
|
||||
@@ -145,6 +145,10 @@ class EventServiceBase:
|
||||
|
||||
# region Model install
|
||||
|
||||
def emit_model_install_download_started(self, job: "ModelInstallJob") -> None:
|
||||
"""Emitted at intervals while the install job is started (remote models only)."""
|
||||
self.dispatch(ModelInstallDownloadStartedEvent.build(job))
|
||||
|
||||
def emit_model_install_download_progress(self, job: "ModelInstallJob") -> None:
|
||||
"""Emitted at intervals while the install job is in progress (remote models only)."""
|
||||
self.dispatch(ModelInstallDownloadProgressEvent.build(job))
|
||||
|
||||
@@ -417,6 +417,42 @@ class ModelLoadCompleteEvent(ModelEventBase):
|
||||
return cls(config=config, submodel_type=submodel_type)
|
||||
|
||||
|
||||
@payload_schema.register
|
||||
class ModelInstallDownloadStartedEvent(ModelEventBase):
|
||||
"""Event model for model_install_download_started"""
|
||||
|
||||
__event_name__ = "model_install_download_started"
|
||||
|
||||
id: int = Field(description="The ID of the install job")
|
||||
source: str = Field(description="Source of the model; local path, repo_id or url")
|
||||
local_path: str = Field(description="Where model is downloading to")
|
||||
bytes: int = Field(description="Number of bytes downloaded so far")
|
||||
total_bytes: int = Field(description="Total size of download, including all files")
|
||||
parts: list[dict[str, int | str]] = Field(
|
||||
description="Progress of downloading URLs that comprise the model, if any"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def build(cls, job: "ModelInstallJob") -> "ModelInstallDownloadStartedEvent":
|
||||
parts: list[dict[str, str | int]] = [
|
||||
{
|
||||
"url": str(x.source),
|
||||
"local_path": str(x.download_path),
|
||||
"bytes": x.bytes,
|
||||
"total_bytes": x.total_bytes,
|
||||
}
|
||||
for x in job.download_parts
|
||||
]
|
||||
return cls(
|
||||
id=job.id,
|
||||
source=str(job.source),
|
||||
local_path=job.local_path.as_posix(),
|
||||
parts=parts,
|
||||
bytes=job.bytes,
|
||||
total_bytes=job.total_bytes,
|
||||
)
|
||||
|
||||
|
||||
@payload_schema.register
|
||||
class ModelInstallDownloadProgressEvent(ModelEventBase):
|
||||
"""Event model for model_install_download_progress"""
|
||||
|
||||
@@ -9,7 +9,7 @@ from pathlib import Path
|
||||
from queue import Empty, Queue
|
||||
from shutil import copyfile, copytree, move, rmtree
|
||||
from tempfile import mkdtemp
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
import torch
|
||||
import yaml
|
||||
@@ -60,9 +60,6 @@ from .model_install_common import (
|
||||
|
||||
TMPDIR_PREFIX = "tmpinstall_"
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from invokeai.app.services.events.events_base import EventServiceBase
|
||||
|
||||
|
||||
class ModelInstallService(ModelInstallServiceBase):
|
||||
"""class for InvokeAI model installation."""
|
||||
@@ -412,11 +409,14 @@ class ModelInstallService(ModelInstallServiceBase):
|
||||
if isinstance(source, HFModelSource):
|
||||
metadata = HuggingFaceMetadataFetch(self._session).from_id(source.repo_id, source.variant)
|
||||
assert isinstance(metadata, ModelMetadataWithFiles)
|
||||
return metadata.download_urls(
|
||||
variant=source.variant or self._guess_variant(),
|
||||
subfolder=source.subfolder,
|
||||
session=self._session,
|
||||
), metadata
|
||||
return (
|
||||
metadata.download_urls(
|
||||
variant=source.variant or self._guess_variant(),
|
||||
subfolder=source.subfolder,
|
||||
session=self._session,
|
||||
),
|
||||
metadata,
|
||||
)
|
||||
|
||||
if isinstance(source, URLModelSource):
|
||||
try:
|
||||
@@ -822,7 +822,7 @@ class ModelInstallService(ModelInstallServiceBase):
|
||||
install_job.download_parts = download_job.download_parts
|
||||
install_job.bytes = sum(x.bytes for x in download_job.download_parts)
|
||||
install_job.total_bytes = download_job.total_bytes
|
||||
self._signal_job_downloading(install_job)
|
||||
self._signal_job_download_started(install_job)
|
||||
|
||||
def _download_progress_callback(self, download_job: MultiFileDownloadJob) -> None:
|
||||
with self._lock:
|
||||
@@ -874,6 +874,13 @@ class ModelInstallService(ModelInstallServiceBase):
|
||||
if self._event_bus:
|
||||
self._event_bus.emit_model_install_started(job)
|
||||
|
||||
def _signal_job_download_started(self, job: ModelInstallJob) -> None:
|
||||
if self._event_bus:
|
||||
assert job._multifile_job is not None
|
||||
assert job.bytes is not None
|
||||
assert job.total_bytes is not None
|
||||
self._event_bus.emit_model_install_download_started(job)
|
||||
|
||||
def _signal_job_downloading(self, job: ModelInstallJob) -> None:
|
||||
if self._event_bus:
|
||||
assert job._multifile_job is not None
|
||||
|
||||
@@ -289,7 +289,7 @@ def prepare_control_image(
|
||||
width: int,
|
||||
height: int,
|
||||
num_channels: int = 3,
|
||||
device: str = "cuda",
|
||||
device: str | torch.device = "cuda",
|
||||
dtype: torch.dtype = torch.float16,
|
||||
control_mode: CONTROLNET_MODE_VALUES = "balanced",
|
||||
resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple",
|
||||
@@ -304,7 +304,7 @@ def prepare_control_image(
|
||||
num_channels (int, optional): The target number of image channels. This is achieved by converting the input
|
||||
image to RGB, then naively taking the first `num_channels` channels. The primary use case is converting a
|
||||
RGB image to a single-channel grayscale image. Raises if `num_channels` cannot be achieved. Defaults to 3.
|
||||
device (str, optional): The target device for the output image. Defaults to "cuda".
|
||||
device (str | torch.Device, optional): The target device for the output image. Defaults to "cuda".
|
||||
dtype (_type_, optional): The dtype for the output image. Defaults to torch.float16.
|
||||
do_classifier_free_guidance (bool, optional): If True, repeat the output image along the batch dimension.
|
||||
Defaults to True.
|
||||
|
||||
@@ -125,13 +125,16 @@ class IPAdapter(RawModel):
|
||||
self.device, dtype=self.dtype
|
||||
)
|
||||
|
||||
def to(self, device: torch.device, dtype: Optional[torch.dtype] = None):
|
||||
self.device = device
|
||||
def to(
|
||||
self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, non_blocking: bool = False
|
||||
):
|
||||
if device is not None:
|
||||
self.device = device
|
||||
if dtype is not None:
|
||||
self.dtype = dtype
|
||||
|
||||
self._image_proj_model.to(device=self.device, dtype=self.dtype)
|
||||
self.attn_weights.to(device=self.device, dtype=self.dtype)
|
||||
self._image_proj_model.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
|
||||
self.attn_weights.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
|
||||
|
||||
def calc_size(self):
|
||||
# workaround for circular import
|
||||
|
||||
@@ -61,9 +61,10 @@ class LoRALayerBase:
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
if self.bias is not None:
|
||||
self.bias = self.bias.to(device=device, dtype=dtype)
|
||||
self.bias = self.bias.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
# TODO: find and debug lora/locon with bias
|
||||
@@ -109,14 +110,15 @@ class LoRALayer(LoRALayerBase):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
super().to(device=device, dtype=dtype)
|
||||
super().to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
self.up = self.up.to(device=device, dtype=dtype)
|
||||
self.down = self.down.to(device=device, dtype=dtype)
|
||||
self.up = self.up.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.down = self.down.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
if self.mid is not None:
|
||||
self.mid = self.mid.to(device=device, dtype=dtype)
|
||||
self.mid = self.mid.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
class LoHALayer(LoRALayerBase):
|
||||
@@ -169,18 +171,19 @@ class LoHALayer(LoRALayerBase):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
super().to(device=device, dtype=dtype)
|
||||
|
||||
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
|
||||
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
|
||||
self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
if self.t1 is not None:
|
||||
self.t1 = self.t1.to(device=device, dtype=dtype)
|
||||
self.t1 = self.t1.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
|
||||
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
|
||||
self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
if self.t2 is not None:
|
||||
self.t2 = self.t2.to(device=device, dtype=dtype)
|
||||
self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
class LoKRLayer(LoRALayerBase):
|
||||
@@ -265,6 +268,7 @@ class LoKRLayer(LoRALayerBase):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
super().to(device=device, dtype=dtype)
|
||||
|
||||
@@ -273,19 +277,19 @@ class LoKRLayer(LoRALayerBase):
|
||||
else:
|
||||
assert self.w1_a is not None
|
||||
assert self.w1_b is not None
|
||||
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
|
||||
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
|
||||
self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
if self.w2 is not None:
|
||||
self.w2 = self.w2.to(device=device, dtype=dtype)
|
||||
self.w2 = self.w2.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
else:
|
||||
assert self.w2_a is not None
|
||||
assert self.w2_b is not None
|
||||
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
|
||||
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
|
||||
self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
if self.t2 is not None:
|
||||
self.t2 = self.t2.to(device=device, dtype=dtype)
|
||||
self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
class FullLayer(LoRALayerBase):
|
||||
@@ -319,10 +323,11 @@ class FullLayer(LoRALayerBase):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
super().to(device=device, dtype=dtype)
|
||||
|
||||
self.weight = self.weight.to(device=device, dtype=dtype)
|
||||
self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
class IA3Layer(LoRALayerBase):
|
||||
@@ -358,11 +363,12 @@ class IA3Layer(LoRALayerBase):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
):
|
||||
super().to(device=device, dtype=dtype)
|
||||
|
||||
self.weight = self.weight.to(device=device, dtype=dtype)
|
||||
self.on_input = self.on_input.to(device=device, dtype=dtype)
|
||||
self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
self.on_input = self.on_input.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
|
||||
@@ -388,10 +394,11 @@ class LoRAModelRaw(RawModel): # (torch.nn.Module):
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
# TODO: try revert if exception?
|
||||
for _key, layer in self.layers.items():
|
||||
layer.to(device=device, dtype=dtype)
|
||||
layer.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
def calc_size(self) -> int:
|
||||
model_size = 0
|
||||
@@ -514,7 +521,7 @@ class LoRAModelRaw(RawModel): # (torch.nn.Module):
|
||||
# lower memory consumption by removing already parsed layer values
|
||||
state_dict[layer_key].clear()
|
||||
|
||||
layer.to(device=device, dtype=dtype)
|
||||
layer.to(device=device, dtype=dtype, non_blocking=True)
|
||||
model.layers[layer_key] = layer
|
||||
|
||||
return model
|
||||
|
||||
24
invokeai/backend/model_hash/hash_validator.py
Normal file
24
invokeai/backend/model_hash/hash_validator.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
|
||||
def validate_hash(hash: str):
|
||||
if ":" not in hash:
|
||||
return
|
||||
for enc_hash in hashes:
|
||||
alg, hash_ = hash.split(":")
|
||||
if alg == "blake3":
|
||||
alg = "blake3_single"
|
||||
map = json.loads(b64decode(enc_hash))
|
||||
if alg in map:
|
||||
if hash_ == map[alg]:
|
||||
raise Exception("Unrecoverable Model Error")
|
||||
|
||||
|
||||
hashes: list[str] = [
|
||||
"eyJibGFrZTNfbXVsdGkiOiI3Yjc5ODZmM2QyNTk3MDZiMjVhZDRhM2NmNGM2MTcyNGNhZmQ0Yjc4NjI4MjIwNjMyZGU4NjVlM2UxNDEyMTVlIiwiYmxha2UzX3NpbmdsZSI6IjdiNzk4NmYzZDI1OTcwNmIyNWFkNGEzY2Y0YzYxNzI0Y2FmZDRiNzg2MjgyMjA2MzJkZTg2NWUzZTE0MTIxNWUiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiNzdlZmU5MzRhZGQ3YmU5Njc3NmJkODM3NWJhZDQxN2QiLCJzaGExIjoiYmM2YzYxYzgwNDgyMTE2ZTY2ZGQyNTYwNjRkYTgxYjFlY2U4NzMzOCIsInNoYTIyNCI6IjgzNzNlZGM4ZTg4Y2UxMTljODdlOTM2OTY4ZWViMWNmMzdjZGY4NTBmZjhjOTZkYjNmMDc4YmE0Iiwic2hhMjU2IjoiNzNjYWMxZWRlZmUyZjdlODFkNjRiMTI2YjIxMmY2Yzk2ZTAwNjgyNGJjZmJkZDI3Y2E5NmUyNTk5ZTQwNzUwZiIsInNoYTM4NCI6IjlmNmUwNzlmOTNiNDlkMTg1YzEyNzY0OGQwNzE3YTA0N2E3MzYyNDI4YzY4MzBhNDViNzExODAwZDE4NjIwZDZjMjcwZGE3ZmY0Y2FjOTRmNGVmZDdiZWQ5OTlkOWU0ZCIsInNoYTUxMiI6IjAwNzE5MGUyYjk5ZjVlN2Q1OGZiYWI2YTk1YmY0NjJiODhkOTg1N2NlNjY4MTMyMGJmM2M0Y2ZiZmY0MjkxZmEzNTMyMTk3YzdkODc2YWQ3NjZhOTQyOTQ2Zjc1OWY2YTViNDBlM2I2MzM3YzIwNWI0M2JkOWMyN2JiMTljNzk0IiwiYmxha2UyYiI6IjlhN2VhNTQzY2ZhMmMzMWYyZDIyNjg2MjUwNzUyNDE0Mjc1OWJiZTA0MWZlMWJkMzQzNDM1MWQwNWZlYjI2OGY2MjU0OTFlMzlmMzdkYWQ4MGM2Y2UzYTE4ZjAxNGEzZjJiMmQ2OGU2OTc0MjRmNTU2M2Y5ZjlhYzc1MzJiMjEwIiwiYmxha2UycyI6ImYxZmMwMjA0YjdjNzIwNGJlNWI1YzY3NDEyYjQ2MjY5NWE3YjFlYWQ2M2E5ZGVkMjEzYjZmYTU0NGZjNjJlYzUiLCJzaGEzXzIyNCI6IjljZDQ3YTBhMzA3NmNmYzI0NjJhNTAzMjVmMjg4ZjFiYzJjMmY2NmU2ODIxODc5NjJhNzU0NjFmIiwic2hhM18yNTYiOiI4NTFlNGI1ZDI1MWZlZTFiYzk0ODU1OWNjMDNiNjhlNTllYWU5YWI1ZTUyYjA0OTgxYTRhOTU4YWQyMDdkYjYwIiwic2hhM18zODQiOiJiZDA2ZTRhZGFlMWQ0MTJmZjFjOTcxMDJkZDFlN2JmY2UzMDViYTgxMTgyNzM3NWY5NTI4OWJkOGIyYTUxNjdiMmUyNzZjODNjNTU3ODFhMTEyMDRhNzc5MTUwMzM5ZTEiLCJzaGEzXzUxMiI6ImQ1ZGQ2OGZmZmY5NGRhZjJhMDkzZTliNmM1MTBlZmZkNThmZTA0ODMyZGQzMzEyOTZmN2NkZmYzNmRhZmQ3NGMxY2VmNjUxNTBkZjk5OGM1ODgyY2MzMzk2MTk1ZTViYjc5OTY1OGFkMTQ3MzFiMjJmZWZiMWQzNmY2MWJjYzJjIiwic2hha2VfMTI4IjoiOWJlNTgwNWMwNjg1MmZmNDUzNGQ4ZDZmODYyMmFkOTJkMGUwMWE2Y2JmYjIwN2QxOTRmM2JkYThiOGNmNWU4ZiIsInNoYWtlXzI1NiI6IjRhYjgwYjY2MzcxYzdhNjBhYWM4NDVkMTZlNWMzZDNhMmM4M2FjM2FjZDNiNTBiNzdjYWYyYTNmMWMyY2ZjZjc5OGNjYjkxN2FjZjQzNzBmZDdjN2ZmODQ5M2Q3NGY1MWM4NGU3M2ViZGQ4MTRmM2MwMzk3YzI4ODlmNTI0Mzg3In0K",
|
||||
"eyJibGFrZTNfbXVsdGkiOiI4ODlmYzIwMDA4NWY1NWY4YTA4MjhiODg3MDM0OTRhMGFmNWZkZGI5N2E2YmYwMDRjM2VkYTdiYzBkNDU0MjQzIiwiYmxha2UzX3NpbmdsZSI6Ijg4OWZjMjAwMDg1ZjU1ZjhhMDgyOGI4ODcwMzQ5NGEwYWY1ZmRkYjk3YTZiZjAwNGMzZWRhN2JjMGQ0NTQyNDMiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiNTIzNTRhMzkzYTVmOGNjNmMyMzQ0OThiYjcxMDljYzEiLCJzaGExIjoiMTJmYmRhOGE3ZGUwOGMwNDc2NTA5OWY2NGNmMGIzYjcxMjc1MGM1NyIsInNoYTIyNCI6IjEyZWU3N2U0Y2NhODViMDk4YjdjNWJlMWFjNGMwNzljNGM3MmJmODA2YjdlZjU1NGI0NzgxZDkxIiwic2hhMjU2IjoiMjU1NTMwZDAyYTY4MjY4OWE5ZTZjMjRhOWZhMDM2OGNhODMxZTI1OTAyYjM2NzQyNzkwZTk3NzU1ZjEzMmNmNSIsInNoYTM4NCI6IjhkMGEyMTRlNDk0NGE2NGY3ZmZjNTg3MGY0ZWUyZTA0OGIzYjRjMmQ0MGRmMWFmYTVlOGE1ZWNkN2IwOTY3M2ZjNWI5YzM5Yzg4Yjc2YmIwY2I4ZjQ1ZjAxY2MwNjZkNCIsInNoYTUxMiI6Ijg3NTM3OWNiYzdlOGYyNzU4YjVjMDY5ZTU2ZWRjODY1ODE4MGFkNDEzNGMwMzY1NzM4ZjM1YjQwYzI2M2JkMTMwMzcwZTE0MzZkNDNmOGFhMTgyMTg5MzgzMTg1ODNhOWJhYTUyYTBjMTk1Mjg5OTQzYzZiYTY2NTg1Yjg5M2ZiIiwiYmxha2UyYiI6IjBhY2MwNWEwOGE5YjhhODNmZTVjYTk4ZmExMTg3NTYwNjk0MjY0YWUxNTI4NDliYzFkNzQzNTYzMzMyMTlhYTg3N2ZiNjc4MmRjZDZiOGIyYjM1MTkyNDQzNDE2ODJiMTQ3YmY2YTY3MDU2ZWIwOTQ4MzE1M2E4Y2ZiNTNmMTI0IiwiYmxha2UycyI6ImY5ZTRhZGRlNGEzZDRhOTZhOWUyNjVjMGVmMjdmZDNiNjA0NzI1NDllMTEyMWQzOGQwMTkxNTY5ZDY5YzdhYzAiLCJzaGEzXzIyNCI6ImM0NjQ3MGRjMjkyNGI0YjZkMTA2NDY5MDRiNWM2OGVjNTU2YmQ4MTA5NmVkMTA4YjZiMzQyZmU1Iiwic2hhM18yNTYiOiIwMDBlMThiZTI1MzYxYTk0NGExZTIwNjQ5ZmY0ZGM2OGRiZTk0OGNkNTYwY2I5MTFhODU1OTE3ODdkNWQ5YWYwIiwic2hhM18zODQiOiIzNDljZmVhMGUxZGE0NWZlMmYzNjJhMWFjZjI1ZTczOWNiNGQ0NDdiM2NiODUzZDVkYWNjMzU5ZmRhMWE1M2FhYWU5OTM2ZmFhZWM1NmFhZDkwMThhYjgxMTI4ZjI3N2YiLCJzaGEzXzUxMiI6ImMxNDgwNGY1YTNjNWE4ZGEyMTAyODk1YTFjZGU4MmIwNGYwZmY4OTczMTc0MmY2NDQyY2NmNzQ1OTQzYWQ5NGViOWZmMTNhZDg3YjRmODkxN2M5NmY5ZjMwZjkwYTFhYTI4OTI3OTkwMjg0ZDJhMzcyMjA0NjE4MTNiNDI0MzEyIiwic2hha2VfMTI4IjoiN2IxY2RkMWUyMzUzMzk0OTg5M2UyMmZkMTAwZmU0YjJhMTU1MDJmMTNjMTI0YzhiZDgxY2QwZDdlOWEzMGNmOCIsInNoYWtlXzI1NiI6ImI0NjMzZThhMjNkZDM0ODk0ZTIyNzc0ODYyNTE1MzVjYWFlNjkyMTdmOTQ0NTc3MzE1NTljODBjNWQ3M2ZkOTMxZTFjMDJlZDI0Yjc3MzE3OTJjMjVlNTZhYjg3NjI4YmJiMDgxNTU0MjU2MWY5ZGI2NWE0NDk4NDFmNGQzYTU4In0K",
|
||||
"eyJibGFrZTNfbXVsdGkiOiI2Y2M0MmU4NGRiOGQyZTliYjA4YjUxNWUwYzlmYzg2NTViNDUwNGRlZDM1MzBlZjFjNTFjZWEwOWUxYThiNGYxIiwiYmxha2UzX3NpbmdsZSI6IjZjYzQyZTg0ZGI4ZDJlOWJiMDhiNTE1ZTBjOWZjODY1NWI0NTA0ZGVkMzUzMGVmMWM1MWNlYTA5ZTFhOGI0ZjEiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiZDQwNjk3NTJhYjQ0NzFhZDliMDY3YmUxMmRjNTM2ZjYiLCJzaGExIjoiOGRjZmVlMjZjZjUyOTllMDBjN2QwZjJiZTc0NmVmMTlkZjliZGExNCIsInNoYTIyNCI6IjhjMzAzOTU3ZjI3NDNiMjUwNmQyYzIzY2VmNmU4MTQ5MTllZmE2MWM0MTFiMDk5ZmMzODc2MmRjIiwic2hhMjU2IjoiZDk3ZjQ2OWJjMWZkMjhjMjZkMjJhN2Y3ODczNzlhZmM4NjY3ZmZmM2FhYTQ5NTE4NmQyZTM4OTU2MTBjZDJmMyIsInNoYTM4NCI6IjY0NmY0YWM0ZDA2YWJkZmE2MDAwN2VjZWNiOWNjOTk4ZmJkOTBiYzYwMmY3NTk2M2RhZDUzMGMzNGE5ZGE1YzY4NjhlMGIwMDJkZDNlMTM4ZjhmMjA2ODcyNzFkMDVjMSIsInNoYTUxMiI6ImYzZTU4NTA0YzYyOGUwYjViNzBhOTYxYThmODA1MDA1NjQ1M2E5NDlmNTgzNDhiYTNhZTVlMjdkNDRhNGJkMjc5ZjA3MmU1OGQ5YjEyOGE1NDc1MTU2ZmM3YzcxMGJkYjI3OWQ5OGFmN2EwYTI4Y2Y1ZDY2MmQxODY4Zjg3ZjI3IiwiYmxha2UyYiI6ImFhNjgyYmJjM2U1ZGRjNDZkNWUxN2VjMzRlNmEzZGY5ZjhiNWQyNzk0YTZkNmY0M2VjODMxZjhjOTU2OGYyY2RiOGE4YjAyNTE4MDA4YmY0Y2FhYTlhY2FhYjNkNzRmZmRiNGZlNDgwOTcwODU3OGJiZjNlNzJjYTc5ZDQwYzZmIiwiYmxha2UycyI6ImQ0ZGJlZTJkMmZlNDMwOGViYTkwMTY1MDdmMzI1ZmJiODZlMWQzNDQ0MjgzNzRlMjAwNjNiNWQ1MzkzZTExNjMiLCJzaGEzXzIyNCI6ImE1ZTM5NWZlNGRlYjIyY2JhNjgwMWFiZTliZjljMjM2YmMzYjkwZDdiN2ZjMTRhZDhjZjQ0NzBlIiwic2hhM18yNTYiOiIwOWYwZGVjODk0OWEzYmQzYzU3N2RjYzUyMTMwMGRiY2UwMjVjM2VjOTJkNzQ0MDJkNTE1ZDA4NTQwODg2NGY1Iiwic2hhM18zODQiOiJmMjEyNmM5NTcxODQ3NDZmNjYyMjE4MTRkMDZkZWQ3NDBhYWU3MDA4MTc0YjI0OTEzY2YwOTQzY2IwMTA5Y2QxNWI4YmMwOGY1YjUwMWYwYzhhOTY4MzUwYzgzY2I1ZWUiLCJzaGEzXzUxMiI6ImU1ZmEwMzIwMzk2YTJjMThjN2UxZjVlZmJiODYwYTU1M2NlMTlkMDQ0MWMxNWEwZTI1M2RiNjJkM2JmNjg0ZDI1OWIxYmQ4OTJkYTcyMDVjYTYyODQ2YzU0YWI1ODYxOTBmNDUxZDlmZmNkNDA5YmU5MzlhNWM1YWIyZDdkM2ZkIiwic2hha2VfMTI4IjoiNGI2MTllM2I4N2U1YTY4OTgxMjk0YzgzMmU0NzljZGI4MWFmODdlZTE4YzM1Zjc5ZjExODY5ZWEzNWUxN2I3MiIsInNoYWtlXzI1NiI6ImYzOWVkNmMxZmQ2NzVmMDg3ODAyYTc4ZTUwYWFkN2ZiYTZiM2QxNzhlZWYzMjRkMTI3ZTZjYmEwMGRjNzkwNTkxNjQ1Y2U1Y2NmMjhjYzVkNWRkODU1OWIzMDMxYTM3ZjE5NjhmYmFhNDQzMmI2ZWU0Yzg3ZWE2YTdkMmE2NWM2In0K",
|
||||
"eyJibGFrZTNfbXVsdGkiOiJhNDRiZjJkMzVkZDI3OTZlZTI1NmY0MzVkODFhNTdhOGM0MjZhMzM5ZDc3NTVkMmNiMjdmMzU4ZjM0NTM4OWM2IiwiYmxha2UzX3NpbmdsZSI6ImE0NGJmMmQzNWRkMjc5NmVlMjU2ZjQzNWQ4MWE1N2E4YzQyNmEzMzlkNzc1NWQyY2IyN2YzNThmMzQ1Mzg5YzYiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiOGU5OTMzMzEyZjg4NDY4MDg0ZmRiZWNjNDYyMTMxZTgiLCJzaGExIjoiNmI0MmZjZDFmMmQyNzUwYWNkY2JkMTUzMmQ4NjQ5YTM1YWI2NDYzNCIsInNoYTIyNCI6ImQ2Y2E2OTUxNzIzZjdjZjg0NzBjZWRjMmVhNjA2ODNmMWU4NDMzM2Q2NDM2MGIzOWIyMjZlZmQzIiwic2hhMjU2IjoiMDAxNGY5Yzg0YjcwMTFhMGJkNzliNzU0NGVjNzg4NDQzNWQ4ZGY0NmRjMDBiNDk0ZmFkYzA4NWQzNDM1NjI4MyIsInNoYTM4NCI6IjMxODg2OTYxODc4NWY3MWJlM2RlZjkyZDgyNzY2NjBhZGE0MGViYTdkMDk1M2Y0YTc5ODdlMThhNzFlNjBlY2EwY2YyM2YwMjVhMmQ4ZjUyMmNkZGY3MTcxODFhMTQxNSIsInNoYTUxMiI6IjdmZGQxN2NmOWU3ZTBhZDcwMzJjMDg1MTkyYWMxZmQ0ZmFhZjZkNWNlYzAzOTE5ZDk0MmZiZTIyNWNhNmIwZTg0NmQ4ZGI0ZjllYTQ5MjJlMTdhNTg4MTY4YzExMTM1NWZiZDQ1NTlmMmU5NDcwNjAwZWE1MzBhMDdiMzY0YWQwIiwiYmxha2UyYiI6IjI0ZjExZWI5M2VlN2YxOTI5NWZiZGU5MTczMmE0NGJkZGYxOWE1ZTQ4MWNmOWFhMjQ2M2UzNDllYjg0Mzc4ZDBkODFjNzY0YWQ1NTk1YjkxZjQzYzgxODcxNTRlYWU5NTZkY2ZjZTlkMWU2MTZjNTFkZThhZDZjZTBhODcyY2Q0IiwiYmxha2UycyI6IjVkZTUwZDUwMGYwYTBmOGRlMTEwOGE2ZmFkZGM4ODNlMTA3NmQ3MThiNmQxN2E4ZDVkMjgzZDdiNGYzZDU2OGEiLCJzaGEzXzIyNCI6IjFhNTA0OGNlYWZiYjg2ZDc4ZmNiNTI0ZTViYTc4NWQ2ZmY5NzY1ZTNlMzdhZWRjZmYxZGVjNGJhIiwic2hhM18yNTYiOiI0YjA0YjE1NTRmMzRkYTlmMjBmZDczM2IzNDg4NjE0ZWNhM2IwOWU1OTJjOGJlMmM0NjA1NjYyMWU0MjJmZDllIiwic2hhM18zODQiOiI1NjMwYjM2OGQ4MGM1YmM5MTgzM2VmNWM2YWUzOTJhNDE4NTNjYmM2MWJiNTI4ZDE4YWM1OWFjZGZiZWU1YThkMWMyZDE4MTM1ZGI2ZWQ2OTJlODFkZThmYTM3MzkxN2MiLCJzaGEzXzUxMiI6IjA2ODg4MGE1MmNiNDkzODYwZDhjOTVhOTFhZGFmZTYwZGYxODc2ZDhjYjFhNmI3NTU2ZjJjM2Y1NjFmMGYwZjMyZjZhYTA1YmVmN2FhYjQ5OWEwNTM0Zjk0Njc4MDEzODlmNDc0ODFiNzcxMjdjMDFiOGFhOTY4NGJhZGUzYmY2Iiwic2hha2VfMTI4IjoiODlmYTdjNDcwNGI4NGZkMWQ1M2E0MTBlN2ZjMzU3NWRhNmUxMGU1YzkzMjM1NWYyZWEyMWM4NDVhZDBlM2UxOCIsInNoYWtlXzI1NiI6IjE4NGNlMWY2NjdmYmIyODA5NWJhZmVkZTQzNTUzZjhkYzBhNGY1MDQwYWJlMjcxMzkzMzcwNDEyZWFiZTg0ZGJhNjI0Y2ZiZWE4YzUxZDU2YzkwMTM2Mjg2ODgyZmQ0Y2E3MzA3NzZjNWUzODFlYzI5MWYxYTczOTE1MDkyMTFmIn0K",
|
||||
"eyJibGFrZTNfbXVsdGkiOiJhYjA2YjNmMDliNTExOTAzMTMzMzY5NDE2MTc4ZDk2ZjlkYTc3ZGEwOTgyNDJmN2VlMTVjNTNhNTRkMDZhNWVmIiwiYmxha2UzX3NpbmdsZSI6ImFiMDZiM2YwOWI1MTE5MDMxMzMzNjk0MTYxNzhkOTZmOWRhNzdkYTA5ODI0MmY3ZWUxNWM1M2E1NGQwNmE1ZWYiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiZWY0MjcxYjU3NTQwMjU4NGQ2OTI5ZWJkMGI3Nzk5NzYiLCJzaGExIjoiMzgzNzliYWQzZjZiZjc4MmM4OTgzOGY3YWVkMzRkNDNkMzNlYWM2MSIsInNoYTIyNCI6ImQ5ZDNiMjJkYmZlY2M1NTdlODAzNjg5M2M3ZWE0N2I0NTQzYzM2NzZhMDk4NzMxMzRhNjQ0OWEwIiwic2hhMjU2IjoiMjYxZGI3NmJlMGYxMzdlZWJkYmI5OGRlYWM0ZjcyMDdiOGUxMjdiY2MyZmMwODI5OGVjZDczYjQ3MjYxNjQ1NiIsInNoYTM4NCI6IjMzMjkwYWQxYjlhMmRkYmU0ODY3MWZiMTIxNDdiZWJhNjI4MjA1MDcwY2VkNjNiZTFmNGU5YWRhMjgwYWU2ZjZjNDkzYTY2MDllMGQ2YTIzMWU2ODU5ZmIyNGZhM2FjMCIsInNoYTUxMiI6IjAzMDZhMWI1NmNiYTdjNjJiNTNmNTk4MTAwMTQ3MDQ5ODBhNGRmZTdjZjQ5NTU4ZmMyMmQxZDczZDc5NzJmZTllODk2ZWRjMmEyYTQxYWVjNjRjZjkwZGUwYjI1NGM0MDBlZTU1YzcwZjk3OGVlMzk5NmM2YzhkNTBjYTI4YTdiIiwiYmxha2UyYiI6IjY1MDZhMDg1YWQ5MGZkZjk2NGJmMGE5NTFkZmVkMTllZTc0NGVjY2EyODQzZjQzYTI5NmFjZDM0M2RiODhhMDNlNTlkNmFmMGM1YWJkNTEzMzc4MTQ5Yjg3OTExMTVmODRmMDIyZWM1M2JmNGFjNDZhZDczNWIwMmJlYTM0MDk5IiwiYmxha2UycyI6IjdlZDQ3ZWQxOTg3MTk0YWFmNGIwMjQ3MWFkNTMyMmY3NTE3ZjI0OTcwMDc2Y2NmNDkzMWI0MzYxMDU1NzBlNDAiLCJzaGEzXzIyNCI6Ijk2MGM4MDExOTlhMGUzYWExNjdiNmU2MWVkMzE2ZDUzMDM2Yjk4M2UyOThkNWI5MjZmMDc3NDlhIiwic2hhM18yNTYiOiIzYzdmYWE1ZDE3Zjk2MGYxOTI2ZjNlNGIyZjc1ZjdiOWIyZDQ4NGFhNmEwM2ViOWNlMTI4NmM2OTE2YWEyM2RlIiwic2hhM18zODQiOiI5Y2Y0NDA1NWFjYzFlYjZmMDY1YjRjODcxYTYzNTM1MGE1ZjY0ODQwM2YwYTU0MWEzYzZhNjI3N2ViZjZmYTNjYmM1YmJiNjQwMDE4OGFlMWIxMTI2OGZmMDJiMzYzZDUiLCJzaGEzXzUxMiI6ImEyZDk3ZDRlYjYxM2UwZDViYTc2OTk2MzE2MzcxOGEwNDIxZDkxNTNiNjllYjM5MDRmZjI4ODRhZDdjNGJiYmIwNGY2Nzc1OTA1YmQxNGI2NTJmZTQ1Njg0YmI5MTQ3ZjBkYWViZjAxZjIzY2MzZDhkMjIzMTE0MGUzNjI4NTE5Iiwic2hha2VfMTI4IjoiNjkwMWMwYjg1MTg5ZTkyNTJiODI3MTc5NjE2MjRlMTM0MDQ1ZjlkMmI5MzM0MzVkM2Y0OThiZWIyN2Q3N2JiNSIsInNoYWtlXzI1NiI6ImIwMjA4ZTFkNDVjZWI0ODdiZDUwNzk3MWJiNWI3MjdjN2UyYmE3ZDliNWM2ZTEyYWE5YTNhOTY5YzcyNDRjODIwZDcyNDY1ODhlZWU3Yjk4ZWM1NzhjZWIxNjc3OTkxODljMWRkMmZkMmZmYWM4MWExZDAzZDFiNjMxOGRkMjBiIn0K",
|
||||
]
|
||||
@@ -31,6 +31,7 @@ from typing_extensions import Annotated, Any, Dict
|
||||
|
||||
from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES
|
||||
from invokeai.app.util.misc import uuid_string
|
||||
from invokeai.backend.model_hash.hash_validator import validate_hash
|
||||
|
||||
from ..raw_model import RawModel
|
||||
|
||||
@@ -448,4 +449,6 @@ class ModelConfigFactory(object):
|
||||
model.key = key
|
||||
if isinstance(model, CheckpointConfigBase) and timestamp is not None:
|
||||
model.converted_at = timestamp
|
||||
if model:
|
||||
validate_hash(model.hash)
|
||||
return model # type: ignore
|
||||
|
||||
@@ -285,9 +285,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
||||
else:
|
||||
new_dict: Dict[str, torch.Tensor] = {}
|
||||
for k, v in cache_entry.state_dict.items():
|
||||
new_dict[k] = v.to(torch.device(target_device), copy=True)
|
||||
new_dict[k] = v.to(torch.device(target_device), copy=True, non_blocking=True)
|
||||
cache_entry.model.load_state_dict(new_dict, assign=True)
|
||||
cache_entry.model.to(target_device)
|
||||
cache_entry.model.to(target_device, non_blocking=True)
|
||||
cache_entry.device = target_device
|
||||
except Exception as e: # blow away cache entry
|
||||
self._delete_cache_entry(cache_entry)
|
||||
|
||||
@@ -10,7 +10,7 @@ from picklescan.scanner import scan_file_path
|
||||
import invokeai.backend.util.logging as logger
|
||||
from invokeai.app.util.misc import uuid_string
|
||||
from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
|
||||
from invokeai.backend.util.util import SilenceWarnings
|
||||
from invokeai.backend.util.silence_warnings import SilenceWarnings
|
||||
|
||||
from .config import (
|
||||
AnyModelConfig,
|
||||
|
||||
@@ -67,7 +67,7 @@ class ModelPatcher:
|
||||
unet: UNet2DConditionModel,
|
||||
loras: Iterator[Tuple[LoRAModelRaw, float]],
|
||||
model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
|
||||
) -> None:
|
||||
) -> Generator[None, None, None]:
|
||||
with cls.apply_lora(
|
||||
unet,
|
||||
loras=loras,
|
||||
@@ -83,7 +83,7 @@ class ModelPatcher:
|
||||
text_encoder: CLIPTextModel,
|
||||
loras: Iterator[Tuple[LoRAModelRaw, float]],
|
||||
model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
|
||||
) -> None:
|
||||
) -> Generator[None, None, None]:
|
||||
with cls.apply_lora(text_encoder, loras=loras, prefix="lora_te_", model_state_dict=model_state_dict):
|
||||
yield
|
||||
|
||||
@@ -95,7 +95,7 @@ class ModelPatcher:
|
||||
loras: Iterator[Tuple[LoRAModelRaw, float]],
|
||||
prefix: str,
|
||||
model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
|
||||
) -> Generator[Any, None, None]:
|
||||
) -> Generator[None, None, None]:
|
||||
"""
|
||||
Apply one or more LoRAs to a model.
|
||||
|
||||
@@ -139,12 +139,12 @@ class ModelPatcher:
|
||||
# We intentionally move to the target device first, then cast. Experimentally, this was found to
|
||||
# be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
|
||||
# same thing in a single call to '.to(...)'.
|
||||
layer.to(device=device)
|
||||
layer.to(dtype=torch.float32)
|
||||
layer.to(device=device, non_blocking=True)
|
||||
layer.to(dtype=torch.float32, non_blocking=True)
|
||||
# TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
|
||||
# devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
|
||||
layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
|
||||
layer.to(device=torch.device("cpu"))
|
||||
layer.to(device=torch.device("cpu"), non_blocking=True)
|
||||
|
||||
assert isinstance(layer_weight, torch.Tensor) # mypy thinks layer_weight is a float|Any ??!
|
||||
if module.weight.shape != layer_weight.shape:
|
||||
@@ -153,7 +153,7 @@ class ModelPatcher:
|
||||
layer_weight = layer_weight.reshape(module.weight.shape)
|
||||
|
||||
assert isinstance(layer_weight, torch.Tensor) # mypy thinks layer_weight is a float|Any ??!
|
||||
module.weight += layer_weight.to(dtype=dtype)
|
||||
module.weight += layer_weight.to(dtype=dtype, non_blocking=True)
|
||||
|
||||
yield # wait for context manager exit
|
||||
|
||||
@@ -161,7 +161,7 @@ class ModelPatcher:
|
||||
assert hasattr(model, "get_submodule") # mypy not picking up fact that torch.nn.Module has get_submodule()
|
||||
with torch.no_grad():
|
||||
for module_key, weight in original_weights.items():
|
||||
model.get_submodule(module_key).weight.copy_(weight)
|
||||
model.get_submodule(module_key).weight.copy_(weight, non_blocking=True)
|
||||
|
||||
@classmethod
|
||||
@contextmanager
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Any, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import onnx
|
||||
import torch
|
||||
from onnx import numpy_helper
|
||||
from onnxruntime import InferenceSession, SessionOptions, get_available_providers
|
||||
|
||||
@@ -188,6 +189,15 @@ class IAIOnnxRuntimeModel(RawModel):
|
||||
# return self.io_binding.copy_outputs_to_cpu()
|
||||
return self.session.run(None, inputs)
|
||||
|
||||
# compatability with RawModel ABC
|
||||
def to(
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
# compatability with diffusers load code
|
||||
@classmethod
|
||||
def from_pretrained(
|
||||
|
||||
@@ -10,6 +10,20 @@ The term 'raw' was introduced to describe a wrapper around a torch.nn.Module
|
||||
that adds additional methods and attributes.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
class RawModel:
|
||||
"""Base class for 'Raw' model wrappers."""
|
||||
import torch
|
||||
|
||||
|
||||
class RawModel(ABC):
|
||||
"""Abstract base class for 'Raw' model wrappers."""
|
||||
|
||||
@abstractmethod
|
||||
def to(
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
@@ -10,12 +10,11 @@ import PIL.Image
|
||||
import psutil
|
||||
import torch
|
||||
import torchvision.transforms as T
|
||||
from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
||||
from diffusers.models.controlnet import ControlNetModel
|
||||
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
|
||||
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
||||
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from diffusers.schedulers import KarrasDiffusionSchedulers
|
||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
||||
from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from pydantic import Field
|
||||
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
|
||||
@@ -26,6 +25,7 @@ from invokeai.backend.stable_diffusion.diffusion.shared_invokeai_diffusion impor
|
||||
from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher, UNetIPAdapterData
|
||||
from invokeai.backend.util.attention import auto_detect_slice_size
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
from invokeai.backend.util.hotfixes import ControlNetModel
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -38,56 +38,18 @@ class PipelineIntermediateState:
|
||||
predicted_original: Optional[torch.Tensor] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AddsMaskLatents:
|
||||
"""Add the channels required for inpainting model input.
|
||||
|
||||
The inpainting model takes the normal latent channels as input, _plus_ a one-channel mask
|
||||
and the latent encoding of the base image.
|
||||
|
||||
This class assumes the same mask and base image should apply to all items in the batch.
|
||||
"""
|
||||
|
||||
forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]
|
||||
mask: torch.Tensor
|
||||
initial_image_latents: torch.Tensor
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
latents: torch.Tensor,
|
||||
t: torch.Tensor,
|
||||
text_embeddings: torch.Tensor,
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
model_input = self.add_mask_channels(latents)
|
||||
return self.forward(model_input, t, text_embeddings, **kwargs)
|
||||
|
||||
def add_mask_channels(self, latents):
|
||||
batch_size = latents.size(0)
|
||||
# duplicate mask and latents for each batch
|
||||
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
image_latents = einops.repeat(self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
# add mask and image as additional channels
|
||||
model_input, _ = einops.pack([latents, mask, image_latents], "b * h w")
|
||||
return model_input
|
||||
|
||||
|
||||
def are_like_tensors(a: torch.Tensor, b: object) -> bool:
|
||||
return isinstance(b, torch.Tensor) and (a.size() == b.size())
|
||||
|
||||
|
||||
@dataclass
|
||||
class AddsMaskGuidance:
|
||||
mask: torch.FloatTensor
|
||||
mask_latents: torch.FloatTensor
|
||||
mask: torch.Tensor
|
||||
mask_latents: torch.Tensor
|
||||
scheduler: SchedulerMixin
|
||||
noise: torch.Tensor
|
||||
gradient_mask: bool
|
||||
is_gradient_mask: bool
|
||||
|
||||
def __call__(self, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
|
||||
return self.apply_mask(latents, t)
|
||||
|
||||
def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
|
||||
def apply_mask(self, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
|
||||
batch_size = latents.size(0)
|
||||
mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
if t.dim() == 0:
|
||||
@@ -100,7 +62,7 @@ class AddsMaskGuidance:
|
||||
# TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
|
||||
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
|
||||
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
|
||||
if self.gradient_mask:
|
||||
if self.is_gradient_mask:
|
||||
threshhold = (t.item()) / self.scheduler.config.num_train_timesteps
|
||||
mask_bool = mask > threshhold # I don't know when mask got inverted, but it did
|
||||
masked_input = torch.where(mask_bool, latents, mask_latents)
|
||||
@@ -200,7 +162,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
safety_checker: Optional[StableDiffusionSafetyChecker],
|
||||
feature_extractor: Optional[CLIPFeatureExtractor],
|
||||
requires_safety_checker: bool = False,
|
||||
control_model: ControlNetModel = None,
|
||||
):
|
||||
super().__init__(
|
||||
vae=vae,
|
||||
@@ -214,8 +175,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
)
|
||||
|
||||
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
|
||||
self.control_model = control_model
|
||||
self.use_ip_adapter = False
|
||||
|
||||
def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
|
||||
"""
|
||||
@@ -280,116 +239,131 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False):
|
||||
raise Exception("Should not be called")
|
||||
|
||||
def add_inpainting_channels_to_latents(
|
||||
self, latents: torch.Tensor, masked_ref_image_latents: torch.Tensor, inpainting_mask: torch.Tensor
|
||||
):
|
||||
"""Given a `latents` tensor, adds the mask and image latents channels required for inpainting.
|
||||
|
||||
Standard (non-inpainting) SD UNet models expect an input with shape (N, 4, H, W). Inpainting models expect an
|
||||
input of shape (N, 9, H, W). The 9 channels are defined as follows:
|
||||
- Channel 0-3: The latents being denoised.
|
||||
- Channel 4: The mask indicating which parts of the image are being inpainted.
|
||||
- Channel 5-8: The latent representation of the masked reference image being inpainted.
|
||||
|
||||
This function assumes that the same mask and base image should apply to all items in the batch.
|
||||
"""
|
||||
# Validate assumptions about input tensor shapes.
|
||||
batch_size, latent_channels, latent_height, latent_width = latents.shape
|
||||
assert latent_channels == 4
|
||||
assert masked_ref_image_latents.shape == [1, 4, latent_height, latent_width]
|
||||
assert inpainting_mask == [1, 1, latent_height, latent_width]
|
||||
|
||||
# Repeat original_image_latents and inpainting_mask to match the latents batch size.
|
||||
original_image_latents = masked_ref_image_latents.expand(batch_size, -1, -1, -1)
|
||||
inpainting_mask = inpainting_mask.expand(batch_size, -1, -1, -1)
|
||||
|
||||
# Concatenate along the channel dimension.
|
||||
return torch.cat([latents, inpainting_mask, original_image_latents], dim=1)
|
||||
|
||||
def latents_from_embeddings(
|
||||
self,
|
||||
latents: torch.Tensor,
|
||||
num_inference_steps: int,
|
||||
scheduler_step_kwargs: dict[str, Any],
|
||||
conditioning_data: TextConditioningData,
|
||||
*,
|
||||
noise: Optional[torch.Tensor],
|
||||
seed: int,
|
||||
timesteps: torch.Tensor,
|
||||
init_timestep: torch.Tensor,
|
||||
additional_guidance: List[Callable] = None,
|
||||
callback: Callable[[PipelineIntermediateState], None] = None,
|
||||
control_data: List[ControlNetData] = None,
|
||||
callback: Callable[[PipelineIntermediateState], None],
|
||||
control_data: list[ControlNetData] | None = None,
|
||||
ip_adapter_data: Optional[list[IPAdapterData]] = None,
|
||||
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
|
||||
mask: Optional[torch.Tensor] = None,
|
||||
masked_latents: Optional[torch.Tensor] = None,
|
||||
gradient_mask: Optional[bool] = False,
|
||||
seed: int,
|
||||
is_gradient_mask: bool = False,
|
||||
) -> torch.Tensor:
|
||||
if init_timestep.shape[0] == 0:
|
||||
return latents
|
||||
"""Denoise the latents.
|
||||
|
||||
if additional_guidance is None:
|
||||
additional_guidance = []
|
||||
Args:
|
||||
latents: The latent-space image to denoise.
|
||||
- If we are inpainting, this is the initial latent image before noise has been added.
|
||||
- If we are generating a new image, this should be initialized to zeros.
|
||||
- In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner).
|
||||
scheduler_step_kwargs: kwargs forwarded to the scheduler.step() method.
|
||||
conditioning_data: Text conditionging data.
|
||||
noise: Noise used for two purposes:
|
||||
1. Used by the scheduler to noise the initial `latents` before denoising.
|
||||
2. Used to noise the `masked_latents` when inpainting.
|
||||
`noise` should be None if the `latents` tensor has already been noised.
|
||||
seed: The seed used to generate the noise for the denoising process.
|
||||
HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
|
||||
same noise used earlier in the pipeline. This should really be handled in a clearer way.
|
||||
timesteps: The timestep schedule for the denoising process.
|
||||
init_timestep: The first timestep in the schedule.
|
||||
TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the
|
||||
case, and remove this duplicate param.
|
||||
callback: A callback function that is called to report progress during the denoising process.
|
||||
control_data: ControlNet data.
|
||||
ip_adapter_data: IP-Adapter data.
|
||||
t2i_adapter_data: T2I-Adapter data.
|
||||
mask: A mask indicating which parts of the image are being inpainted. The presence of mask is used to
|
||||
determine whether we are inpainting or not. `mask` should have the same spatial dimensions as the
|
||||
`latents` tensor.
|
||||
TODO(ryand): Check and document the expected dtype, range, and values used to represent
|
||||
foreground/background.
|
||||
masked_latents: A latent-space representation of a masked inpainting reference image. This tensor is only
|
||||
used if an *inpainting* model is being used i.e. this tensor is not used when inpainting with a standard
|
||||
SD UNet model.
|
||||
is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not.
|
||||
"""
|
||||
# TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
|
||||
# cases where densoisings_start and denoising_end are set such that there are no timesteps.
|
||||
if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
|
||||
return latents
|
||||
|
||||
orig_latents = latents.clone()
|
||||
|
||||
batch_size = latents.shape[0]
|
||||
batched_t = init_timestep.expand(batch_size)
|
||||
batched_init_timestep = init_timestep.expand(batch_size)
|
||||
|
||||
# noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
|
||||
if noise is not None:
|
||||
# TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
|
||||
# full noise. Investigate the history of why this got commented out.
|
||||
# latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
|
||||
latents = self.scheduler.add_noise(latents, noise, batched_t)
|
||||
latents = self.scheduler.add_noise(latents, noise, batched_init_timestep)
|
||||
|
||||
if mask is not None:
|
||||
if is_inpainting_model(self.unet):
|
||||
if masked_latents is None:
|
||||
raise Exception("Source image required for inpaint mask when inpaint model used!")
|
||||
|
||||
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
|
||||
self._unet_forward, mask, masked_latents
|
||||
)
|
||||
else:
|
||||
# if no noise provided, noisify unmasked area based on seed
|
||||
if noise is None:
|
||||
noise = torch.randn(
|
||||
orig_latents.shape,
|
||||
dtype=torch.float32,
|
||||
device="cpu",
|
||||
generator=torch.Generator(device="cpu").manual_seed(seed),
|
||||
).to(device=orig_latents.device, dtype=orig_latents.dtype)
|
||||
|
||||
additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise, gradient_mask))
|
||||
|
||||
try:
|
||||
latents = self.generate_latents_from_embeddings(
|
||||
latents,
|
||||
timesteps,
|
||||
conditioning_data,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
additional_guidance=additional_guidance,
|
||||
control_data=control_data,
|
||||
ip_adapter_data=ip_adapter_data,
|
||||
t2i_adapter_data=t2i_adapter_data,
|
||||
callback=callback,
|
||||
)
|
||||
finally:
|
||||
self.invokeai_diffuser.model_forward_callback = self._unet_forward
|
||||
|
||||
# restore unmasked part after the last step is completed
|
||||
# in-process masking happens before each step
|
||||
if mask is not None:
|
||||
if gradient_mask:
|
||||
latents = torch.where(mask > 0, latents, orig_latents)
|
||||
else:
|
||||
latents = torch.lerp(
|
||||
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
|
||||
)
|
||||
|
||||
return latents
|
||||
|
||||
def generate_latents_from_embeddings(
|
||||
self,
|
||||
latents: torch.Tensor,
|
||||
timesteps,
|
||||
conditioning_data: TextConditioningData,
|
||||
scheduler_step_kwargs: dict[str, Any],
|
||||
*,
|
||||
additional_guidance: List[Callable] = None,
|
||||
control_data: List[ControlNetData] = None,
|
||||
ip_adapter_data: Optional[list[IPAdapterData]] = None,
|
||||
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
|
||||
callback: Callable[[PipelineIntermediateState], None] = None,
|
||||
) -> torch.Tensor:
|
||||
self._adjust_memory_efficient_attention(latents)
|
||||
if additional_guidance is None:
|
||||
additional_guidance = []
|
||||
|
||||
batch_size = latents.shape[0]
|
||||
# Handle mask guidance (a.k.a. inpainting).
|
||||
mask_guidance: AddsMaskGuidance | None = None
|
||||
if mask is not None and not is_inpainting_model(self.unet):
|
||||
# We are doing inpainting, since a mask is provided, but we are not using an inpainting model, so we will
|
||||
# apply mask guidance to the latents.
|
||||
|
||||
if timesteps.shape[0] == 0:
|
||||
return latents
|
||||
# 'noise' might be None if the latents have already been noised (e.g. when running the SDXL refiner).
|
||||
# We still need noise for inpainting, so we generate it from the seed here.
|
||||
if noise is None:
|
||||
noise = torch.randn(
|
||||
orig_latents.shape,
|
||||
dtype=torch.float32,
|
||||
device="cpu",
|
||||
generator=torch.Generator(device="cpu").manual_seed(seed),
|
||||
).to(device=orig_latents.device, dtype=orig_latents.dtype)
|
||||
|
||||
mask_guidance = AddsMaskGuidance(
|
||||
mask=mask,
|
||||
mask_latents=orig_latents,
|
||||
scheduler=self.scheduler,
|
||||
noise=noise,
|
||||
is_gradient_mask=is_gradient_mask,
|
||||
)
|
||||
|
||||
use_ip_adapter = ip_adapter_data is not None
|
||||
use_regional_prompting = (
|
||||
conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None
|
||||
)
|
||||
unet_attention_patcher = None
|
||||
self.use_ip_adapter = use_ip_adapter
|
||||
attn_ctx = nullcontext()
|
||||
|
||||
if use_ip_adapter or use_regional_prompting:
|
||||
@@ -402,28 +376,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model)
|
||||
|
||||
with attn_ctx:
|
||||
if callback is not None:
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=-1,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=self.scheduler.config.num_train_timesteps,
|
||||
latents=latents,
|
||||
)
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=-1,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=self.scheduler.config.num_train_timesteps,
|
||||
latents=latents,
|
||||
)
|
||||
)
|
||||
|
||||
# print("timesteps:", timesteps)
|
||||
for i, t in enumerate(self.progress_bar(timesteps)):
|
||||
batched_t = t.expand(batch_size)
|
||||
step_output = self.step(
|
||||
batched_t,
|
||||
latents,
|
||||
conditioning_data,
|
||||
t=batched_t,
|
||||
latents=latents,
|
||||
conditioning_data=conditioning_data,
|
||||
step_index=i,
|
||||
total_step_count=len(timesteps),
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
additional_guidance=additional_guidance,
|
||||
mask_guidance=mask_guidance,
|
||||
mask=mask,
|
||||
masked_latents=masked_latents,
|
||||
control_data=control_data,
|
||||
ip_adapter_data=ip_adapter_data,
|
||||
t2i_adapter_data=t2i_adapter_data,
|
||||
@@ -431,19 +405,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
latents = step_output.prev_sample
|
||||
predicted_original = getattr(step_output, "pred_original_sample", None)
|
||||
|
||||
if callback is not None:
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=i,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=int(t),
|
||||
latents=latents,
|
||||
predicted_original=predicted_original,
|
||||
)
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=i,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=int(t),
|
||||
latents=latents,
|
||||
predicted_original=predicted_original,
|
||||
)
|
||||
)
|
||||
|
||||
return latents
|
||||
# restore unmasked part after the last step is completed
|
||||
# in-process masking happens before each step
|
||||
if mask is not None:
|
||||
if is_gradient_mask:
|
||||
latents = torch.where(mask > 0, latents, orig_latents)
|
||||
else:
|
||||
latents = torch.lerp(
|
||||
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
|
||||
)
|
||||
|
||||
return latents
|
||||
|
||||
@torch.inference_mode()
|
||||
def step(
|
||||
@@ -454,19 +437,20 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
step_index: int,
|
||||
total_step_count: int,
|
||||
scheduler_step_kwargs: dict[str, Any],
|
||||
additional_guidance: List[Callable] = None,
|
||||
control_data: List[ControlNetData] = None,
|
||||
mask_guidance: AddsMaskGuidance | None,
|
||||
mask: torch.Tensor | None,
|
||||
masked_latents: torch.Tensor | None,
|
||||
control_data: list[ControlNetData] | None = None,
|
||||
ip_adapter_data: Optional[list[IPAdapterData]] = None,
|
||||
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
|
||||
):
|
||||
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
|
||||
timestep = t[0]
|
||||
if additional_guidance is None:
|
||||
additional_guidance = []
|
||||
|
||||
# one day we will expand this extension point, but for now it just does denoise masking
|
||||
for guidance in additional_guidance:
|
||||
latents = guidance(latents, timestep)
|
||||
# Handle masked image-to-image (a.k.a inpainting).
|
||||
if mask_guidance is not None:
|
||||
# NOTE: This is intentionally done *before* self.scheduler.scale_model_input(...).
|
||||
latents = mask_guidance(latents, timestep)
|
||||
|
||||
# TODO: should this scaling happen here or inside self._unet_forward?
|
||||
# i.e. before or after passing it to InvokeAIDiffuserComponent
|
||||
@@ -514,6 +498,31 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
|
||||
down_intrablock_additional_residuals = accum_adapter_state
|
||||
|
||||
# Handle inpainting models.
|
||||
if is_inpainting_model(self.unet):
|
||||
# NOTE: These calls to add_inpainting_channels_to_latents(...) are intentionally done *after*
|
||||
# self.scheduler.scale_model_input(...) so that the scaling is not applied to the mask or reference image
|
||||
# latents.
|
||||
if mask is not None:
|
||||
if masked_latents is None:
|
||||
raise ValueError("Source image required for inpaint mask when inpaint model used!")
|
||||
latent_model_input = self.add_inpainting_channels_to_latents(
|
||||
latents=latent_model_input, masked_ref_image_latents=masked_latents, inpainting_mask=mask
|
||||
)
|
||||
else:
|
||||
# We are using an inpainting model, but no mask was provided, so we are not really "inpainting".
|
||||
# We generate a global mask and empty original image so that we can still generate in this
|
||||
# configuration.
|
||||
# TODO(ryand): Should we just raise an exception here instead? I can't think of a use case for wanting
|
||||
# to do this.
|
||||
# TODO(ryand): If we decide that there is a good reason to keep this, then we should generate the 'fake'
|
||||
# mask and original image once rather than on every denoising step.
|
||||
latent_model_input = self.add_inpainting_channels_to_latents(
|
||||
latents=latent_model_input,
|
||||
masked_ref_image_latents=torch.zeros_like(latent_model_input[:1]),
|
||||
inpainting_mask=torch.ones_like(latent_model_input[:1, :1]),
|
||||
)
|
||||
|
||||
uc_noise_pred, c_noise_pred = self.invokeai_diffuser.do_unet_step(
|
||||
sample=latent_model_input,
|
||||
timestep=t, # TODO: debug how handled batched and non batched timesteps
|
||||
@@ -542,17 +551,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
step_output = self.scheduler.step(noise_pred, timestep, latents, **scheduler_step_kwargs)
|
||||
|
||||
# TODO: discuss injection point options. For now this is a patch to get progress images working with inpainting again.
|
||||
for guidance in additional_guidance:
|
||||
# apply the mask to any "denoised" or "pred_original_sample" fields
|
||||
# TODO: discuss injection point options. For now this is a patch to get progress images working with inpainting
|
||||
# again.
|
||||
if mask_guidance is not None:
|
||||
# Apply the mask to any "denoised" or "pred_original_sample" fields.
|
||||
if hasattr(step_output, "denoised"):
|
||||
step_output.pred_original_sample = guidance(step_output.denoised, self.scheduler.timesteps[-1])
|
||||
step_output.pred_original_sample = mask_guidance(step_output.denoised, self.scheduler.timesteps[-1])
|
||||
elif hasattr(step_output, "pred_original_sample"):
|
||||
step_output.pred_original_sample = guidance(
|
||||
step_output.pred_original_sample = mask_guidance(
|
||||
step_output.pred_original_sample, self.scheduler.timesteps[-1]
|
||||
)
|
||||
else:
|
||||
step_output.pred_original_sample = guidance(latents, self.scheduler.timesteps[-1])
|
||||
step_output.pred_original_sample = mask_guidance(latents, self.scheduler.timesteps[-1])
|
||||
|
||||
return step_output
|
||||
|
||||
@@ -575,17 +585,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
**kwargs,
|
||||
):
|
||||
"""predict the noise residual"""
|
||||
if is_inpainting_model(self.unet) and latents.size(1) == 4:
|
||||
# Pad out normal non-inpainting inputs for an inpainting model.
|
||||
# FIXME: There are too many layers of functions and we have too many different ways of
|
||||
# overriding things! This should get handled in a way more consistent with the other
|
||||
# use of AddsMaskLatents.
|
||||
latents = AddsMaskLatents(
|
||||
self._unet_forward,
|
||||
mask=torch.ones_like(latents[:1, :1], device=latents.device, dtype=latents.dtype),
|
||||
initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype),
|
||||
).add_mask_channels(latents)
|
||||
|
||||
# First three args should be positional, not keywords, so torch hooks can see them.
|
||||
return self.unet(
|
||||
latents,
|
||||
|
||||
242
invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
Normal file
242
invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
Normal file
@@ -0,0 +1,242 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import torch
|
||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
||||
|
||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import (
|
||||
ControlNetData,
|
||||
PipelineIntermediateState,
|
||||
StableDiffusionGeneratorPipeline,
|
||||
)
|
||||
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import TextConditioningData
|
||||
from invokeai.backend.tiles.utils import TBLR
|
||||
|
||||
# The maximum number of regions with compatible sizes that will be batched together.
|
||||
# Larger batch sizes improve speed, but require more device memory.
|
||||
MAX_REGION_BATCH_SIZE = 4
|
||||
|
||||
|
||||
@dataclass
|
||||
class MultiDiffusionRegionConditioning:
|
||||
# Region coords in latent space.
|
||||
region: TBLR
|
||||
text_conditioning_data: TextConditioningData
|
||||
control_data: list[ControlNetData]
|
||||
|
||||
|
||||
class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
|
||||
"""A Stable Diffusion pipeline that uses Multi-Diffusion (https://arxiv.org/pdf/2302.08113) for denoising."""
|
||||
|
||||
def _split_into_region_batches(
|
||||
self, multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning]
|
||||
) -> list[list[MultiDiffusionRegionConditioning]]:
|
||||
# Group the regions by shape. Only regions with the same shape can be batched together.
|
||||
conditioning_by_shape: dict[tuple[int, int], list[MultiDiffusionRegionConditioning]] = {}
|
||||
for region_conditioning in multi_diffusion_conditioning:
|
||||
shape_hw = (
|
||||
region_conditioning.region.bottom - region_conditioning.region.top,
|
||||
region_conditioning.region.right - region_conditioning.region.left,
|
||||
)
|
||||
# In python, a tuple of hashable objects is hashable, so can be used as a key in a dict.
|
||||
if shape_hw not in conditioning_by_shape:
|
||||
conditioning_by_shape[shape_hw] = []
|
||||
conditioning_by_shape[shape_hw].append(region_conditioning)
|
||||
|
||||
# Split the regions into batches, respecting the MAX_REGION_BATCH_SIZE constraint.
|
||||
region_conditioning_batches = []
|
||||
for region_conditioning_batch in conditioning_by_shape.values():
|
||||
for i in range(0, len(region_conditioning_batch), MAX_REGION_BATCH_SIZE):
|
||||
region_conditioning_batches.append(region_conditioning_batch[i : i + MAX_REGION_BATCH_SIZE])
|
||||
|
||||
return region_conditioning_batches
|
||||
|
||||
def _check_regional_prompting(self, multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning]):
|
||||
"""Check the input conditioning and confirm that regional prompting is not used."""
|
||||
for region_conditioning in multi_diffusion_conditioning:
|
||||
if (
|
||||
region_conditioning.text_conditioning_data.cond_regions is not None
|
||||
or region_conditioning.text_conditioning_data.uncond_regions is not None
|
||||
):
|
||||
raise NotImplementedError("Regional prompting is not yet supported in Multi-Diffusion.")
|
||||
|
||||
def multi_diffusion_denoise(
|
||||
self,
|
||||
multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning],
|
||||
latents: torch.Tensor,
|
||||
scheduler_step_kwargs: dict[str, Any],
|
||||
noise: Optional[torch.Tensor],
|
||||
timesteps: torch.Tensor,
|
||||
init_timestep: torch.Tensor,
|
||||
callback: Callable[[PipelineIntermediateState], None],
|
||||
) -> torch.Tensor:
|
||||
self._check_regional_prompting(multi_diffusion_conditioning)
|
||||
|
||||
# TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
|
||||
# cases where densoisings_start and denoising_end are set such that there are no timesteps.
|
||||
if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
|
||||
return latents
|
||||
|
||||
batch_size, _, latent_height, latent_width = latents.shape
|
||||
batched_init_timestep = init_timestep.expand(batch_size)
|
||||
|
||||
# noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
|
||||
if noise is not None:
|
||||
# TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
|
||||
# full noise. Investigate the history of why this got commented out.
|
||||
# latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
|
||||
latents = self.scheduler.add_noise(latents, noise, batched_init_timestep)
|
||||
|
||||
# TODO(ryand): Look into the implications of passing in latents here that are larger than they will be after
|
||||
# cropping into regions.
|
||||
self._adjust_memory_efficient_attention(latents)
|
||||
|
||||
# Populate a weighted mask that will be used to combine the results from each region after every step.
|
||||
# For now, we assume that each region has the same weight (1.0).
|
||||
region_weight_mask = torch.zeros(
|
||||
(1, 1, latent_height, latent_width), device=latents.device, dtype=latents.dtype
|
||||
)
|
||||
for region_conditioning in multi_diffusion_conditioning:
|
||||
region = region_conditioning.region
|
||||
region_weight_mask[:, :, region.top : region.bottom, region.left : region.right] += 1.0
|
||||
|
||||
# Group the region conditioning into batches for faster processing.
|
||||
# region_conditioning_batches[b][r] is the r'th region in the b'th batch.
|
||||
region_conditioning_batches = self._split_into_region_batches(multi_diffusion_conditioning)
|
||||
|
||||
# Many of the diffusers schedulers are stateful (i.e. they update internal state in each call to step()). Since
|
||||
# we are calling step() multiple times at the same timestep (once for each region batch), we must maintain a
|
||||
# separate scheduler state for each region batch.
|
||||
region_batch_schedulers: list[SchedulerMixin] = [
|
||||
copy.deepcopy(self.scheduler) for _ in region_conditioning_batches
|
||||
]
|
||||
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=-1,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=self.scheduler.config.num_train_timesteps,
|
||||
latents=latents,
|
||||
)
|
||||
)
|
||||
|
||||
for i, t in enumerate(self.progress_bar(timesteps)):
|
||||
batched_t = t.expand(batch_size)
|
||||
|
||||
merged_latents = torch.zeros_like(latents)
|
||||
merged_pred_original: torch.Tensor | None = None
|
||||
for region_batch_idx, region_conditioning_batch in enumerate(region_conditioning_batches):
|
||||
# Switch to the scheduler for the region batch.
|
||||
self.scheduler = region_batch_schedulers[region_batch_idx]
|
||||
|
||||
# TODO(ryand): This logic has not yet been tested with input latents with a batch_size > 1.
|
||||
|
||||
# Prepare the latents for the region batch.
|
||||
batch_latents = torch.cat(
|
||||
[
|
||||
latents[
|
||||
:,
|
||||
:,
|
||||
region_conditioning.region.top : region_conditioning.region.bottom,
|
||||
region_conditioning.region.left : region_conditioning.region.right,
|
||||
]
|
||||
for region_conditioning in region_conditioning_batch
|
||||
],
|
||||
)
|
||||
|
||||
# TODO(ryand): Do we have to repeat the text_conditioning_data to match the batch size? Or does step()
|
||||
# handle broadcasting properly?
|
||||
|
||||
# TODO(ryand): Resume here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# Run the denoising step on the region.
|
||||
step_output = self.step(
|
||||
t=batched_t,
|
||||
latents=batch_latents,
|
||||
conditioning_data=region_conditioning.text_conditioning_data,
|
||||
step_index=i,
|
||||
total_step_count=total_step_count,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
mask_guidance=None,
|
||||
mask=None,
|
||||
masked_latents=None,
|
||||
control_data=region_conditioning.control_data,
|
||||
)
|
||||
# Run a denoising step on the region.
|
||||
# step_output = self._region_step(
|
||||
# region_conditioning=region_conditioning,
|
||||
# t=batched_t,
|
||||
# latents=latents,
|
||||
# step_index=i,
|
||||
# total_step_count=len(timesteps),
|
||||
# scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
# )
|
||||
|
||||
# Store the results from the region.
|
||||
region = region_conditioning.region
|
||||
merged_latents[:, :, region.top : region.bottom, region.left : region.right] += step_output.prev_sample
|
||||
pred_orig_sample = getattr(step_output, "pred_original_sample", None)
|
||||
if pred_orig_sample is not None:
|
||||
# If one region has pred_original_sample, then we can assume that all regions will have it, because
|
||||
# they all use the same scheduler.
|
||||
if merged_pred_original is None:
|
||||
merged_pred_original = torch.zeros_like(latents)
|
||||
merged_pred_original[:, :, region.top : region.bottom, region.left : region.right] += (
|
||||
pred_orig_sample
|
||||
)
|
||||
|
||||
# Normalize the merged results.
|
||||
latents = torch.where(region_weight_mask > 0, merged_latents / region_weight_mask, merged_latents)
|
||||
predicted_original = None
|
||||
if merged_pred_original is not None:
|
||||
predicted_original = torch.where(
|
||||
region_weight_mask > 0, merged_pred_original / region_weight_mask, merged_pred_original
|
||||
)
|
||||
|
||||
callback(
|
||||
PipelineIntermediateState(
|
||||
step=i,
|
||||
order=self.scheduler.order,
|
||||
total_steps=len(timesteps),
|
||||
timestep=int(t),
|
||||
latents=latents,
|
||||
predicted_original=predicted_original,
|
||||
)
|
||||
)
|
||||
|
||||
return latents
|
||||
|
||||
@torch.inference_mode()
|
||||
def _region_batch_step(
|
||||
self,
|
||||
region_conditioning: MultiDiffusionRegionConditioning,
|
||||
t: torch.Tensor,
|
||||
latents: torch.Tensor,
|
||||
step_index: int,
|
||||
total_step_count: int,
|
||||
scheduler_step_kwargs: dict[str, Any],
|
||||
):
|
||||
# Crop the inputs to the region.
|
||||
region_latents = latents[
|
||||
:,
|
||||
:,
|
||||
region_conditioning.region.top : region_conditioning.region.bottom,
|
||||
region_conditioning.region.left : region_conditioning.region.right,
|
||||
]
|
||||
|
||||
# Run the denoising step on the region.
|
||||
return self.step(
|
||||
t=t,
|
||||
latents=region_latents,
|
||||
conditioning_data=region_conditioning.text_conditioning_data,
|
||||
step_index=step_index,
|
||||
total_step_count=total_step_count,
|
||||
scheduler_step_kwargs=scheduler_step_kwargs,
|
||||
mask_guidance=None,
|
||||
mask=None,
|
||||
masked_latents=None,
|
||||
control_data=region_conditioning.control_data,
|
||||
)
|
||||
@@ -65,6 +65,18 @@ class TextualInversionModelRaw(RawModel):
|
||||
|
||||
return result
|
||||
|
||||
def to(
|
||||
self,
|
||||
device: Optional[torch.device] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
non_blocking: bool = False,
|
||||
) -> None:
|
||||
if not torch.cuda.is_available():
|
||||
return
|
||||
for emb in [self.embedding, self.embedding_2]:
|
||||
if emb is not None:
|
||||
emb.to(device=device, dtype=dtype, non_blocking=non_blocking)
|
||||
|
||||
|
||||
class TextualInversionManager(BaseTextualInversionManager):
|
||||
"""TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library."""
|
||||
|
||||
@@ -1,29 +1,36 @@
|
||||
"""Context class to silence transformers and diffusers warnings."""
|
||||
|
||||
import warnings
|
||||
from typing import Any
|
||||
from contextlib import ContextDecorator
|
||||
|
||||
from diffusers import logging as diffusers_logging
|
||||
from diffusers.utils import logging as diffusers_logging
|
||||
from transformers import logging as transformers_logging
|
||||
|
||||
|
||||
class SilenceWarnings(object):
|
||||
"""Use in context to temporarily turn off warnings from transformers & diffusers modules.
|
||||
# Inherit from ContextDecorator to allow using SilenceWarnings as both a context manager and a decorator.
|
||||
class SilenceWarnings(ContextDecorator):
|
||||
"""A context manager that disables warnings from transformers & diffusers modules while active.
|
||||
|
||||
As context manager:
|
||||
```
|
||||
with SilenceWarnings():
|
||||
# do something
|
||||
```
|
||||
|
||||
As decorator:
|
||||
```
|
||||
@SilenceWarnings()
|
||||
def some_function():
|
||||
# do something
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.transformers_verbosity = transformers_logging.get_verbosity()
|
||||
self.diffusers_verbosity = diffusers_logging.get_verbosity()
|
||||
|
||||
def __enter__(self) -> None:
|
||||
self._transformers_verbosity = transformers_logging.get_verbosity()
|
||||
self._diffusers_verbosity = diffusers_logging.get_verbosity()
|
||||
transformers_logging.set_verbosity_error()
|
||||
diffusers_logging.set_verbosity_error()
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
transformers_logging.set_verbosity(self.transformers_verbosity)
|
||||
diffusers_logging.set_verbosity(self.diffusers_verbosity)
|
||||
def __exit__(self, *args) -> None:
|
||||
transformers_logging.set_verbosity(self._transformers_verbosity)
|
||||
diffusers_logging.set_verbosity(self._diffusers_verbosity)
|
||||
warnings.simplefilter("default")
|
||||
|
||||
@@ -3,12 +3,9 @@ import io
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
from diffusers import logging as diffusers_logging
|
||||
from PIL import Image
|
||||
from transformers import logging as transformers_logging
|
||||
|
||||
# actual size of a gig
|
||||
GIG = 1073741824
|
||||
@@ -80,21 +77,3 @@ class Chdir(object):
|
||||
|
||||
def __exit__(self, *args):
|
||||
os.chdir(self.original)
|
||||
|
||||
|
||||
class SilenceWarnings(object):
|
||||
"""Context manager to temporarily lower verbosity of diffusers & transformers warning messages."""
|
||||
|
||||
def __enter__(self):
|
||||
"""Set verbosity to error."""
|
||||
self.transformers_verbosity = transformers_logging.get_verbosity()
|
||||
self.diffusers_verbosity = diffusers_logging.get_verbosity()
|
||||
transformers_logging.set_verbosity_error()
|
||||
diffusers_logging.set_verbosity_error()
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
"""Restore logger verbosity to state before context was entered."""
|
||||
transformers_logging.set_verbosity(self.transformers_verbosity)
|
||||
diffusers_logging.set_verbosity(self.diffusers_verbosity)
|
||||
warnings.simplefilter("default")
|
||||
|
||||
@@ -5,43 +5,122 @@ import {
|
||||
socketModelInstallCancelled,
|
||||
socketModelInstallComplete,
|
||||
socketModelInstallDownloadProgress,
|
||||
socketModelInstallDownloadsComplete,
|
||||
socketModelInstallDownloadStarted,
|
||||
socketModelInstallError,
|
||||
socketModelInstallStarted,
|
||||
} from 'services/events/actions';
|
||||
|
||||
/**
|
||||
* A model install has two main stages - downloading and installing. All these events are namespaced under `model_install_`
|
||||
* which is a bit misleading. For example, a `model_install_started` event is actually fired _after_ the model has fully
|
||||
* downloaded and is being "physically" installed.
|
||||
*
|
||||
* Note: the download events are only fired for remote model installs, not local.
|
||||
*
|
||||
* Here's the expected flow:
|
||||
* - API receives install request, model manager preps the install
|
||||
* - `model_install_download_started` fired when the download starts
|
||||
* - `model_install_download_progress` fired continually until the download is complete
|
||||
* - `model_install_download_complete` fired when the download is complete
|
||||
* - `model_install_started` fired when the "physical" installation starts
|
||||
* - `model_install_complete` fired when the installation is complete
|
||||
* - `model_install_cancelled` fired if the installation is cancelled
|
||||
* - `model_install_error` fired if the installation has an error
|
||||
*/
|
||||
|
||||
const selectModelInstalls = modelsApi.endpoints.listModelInstalls.select();
|
||||
|
||||
export const addModelInstallEventListener = (startAppListening: AppStartListening) => {
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallDownloadProgress,
|
||||
effect: async (action, { dispatch }) => {
|
||||
const { bytes, total_bytes, id } = action.payload.data;
|
||||
actionCreator: socketModelInstallDownloadStarted,
|
||||
effect: async (action, { dispatch, getState }) => {
|
||||
const { id } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.bytes = bytes;
|
||||
modelImport.total_bytes = total_bytes;
|
||||
modelImport.status = 'downloading';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'downloading';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallStarted,
|
||||
effect: async (action, { dispatch, getState }) => {
|
||||
const { id } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'running';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallDownloadProgress,
|
||||
effect: async (action, { dispatch, getState }) => {
|
||||
const { bytes, total_bytes, id } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.bytes = bytes;
|
||||
modelImport.total_bytes = total_bytes;
|
||||
modelImport.status = 'downloading';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallComplete,
|
||||
effect: (action, { dispatch }) => {
|
||||
effect: (action, { dispatch, getState }) => {
|
||||
const { id } = action.payload.data;
|
||||
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'completed';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'completed';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelConfig', id: LIST_TAG }]));
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelScanFolderResults', id: LIST_TAG }]));
|
||||
},
|
||||
@@ -49,37 +128,69 @@ export const addModelInstallEventListener = (startAppListening: AppStartListenin
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallError,
|
||||
effect: (action, { dispatch }) => {
|
||||
effect: (action, { dispatch, getState }) => {
|
||||
const { id, error, error_type } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'error';
|
||||
modelImport.error_reason = error_type;
|
||||
modelImport.error = error;
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'error';
|
||||
modelImport.error_reason = error_type;
|
||||
modelImport.error = error;
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallCancelled,
|
||||
effect: (action, { dispatch }) => {
|
||||
effect: (action, { dispatch, getState }) => {
|
||||
const { id } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'cancelled';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'cancelled';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
startAppListening({
|
||||
actionCreator: socketModelInstallDownloadsComplete,
|
||||
effect: (action, { dispatch, getState }) => {
|
||||
const { id } = action.payload.data;
|
||||
const { data } = selectModelInstalls(getState());
|
||||
|
||||
if (!data || !data.find((m) => m.id === id)) {
|
||||
dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
|
||||
} else {
|
||||
dispatch(
|
||||
modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
|
||||
const modelImport = draft.find((m) => m.id === id);
|
||||
if (modelImport) {
|
||||
modelImport.status = 'downloads_done';
|
||||
}
|
||||
return draft;
|
||||
})
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -123,6 +123,13 @@ export type paths = {
|
||||
*/
|
||||
delete: operations["prune_model_install_jobs"];
|
||||
};
|
||||
"/api/v2/models/install/huggingface": {
|
||||
/**
|
||||
* Install Hugging Face Model
|
||||
* @description Install a Hugging Face model using a string identifier.
|
||||
*/
|
||||
get: operations["install_hugging_face_model"];
|
||||
};
|
||||
"/api/v2/models/install/{id}": {
|
||||
/**
|
||||
* Get Model Install Job
|
||||
@@ -3788,23 +3795,6 @@ export type components = {
|
||||
* @description Class to monitor and control a model download request.
|
||||
*/
|
||||
DownloadJob: {
|
||||
/**
|
||||
* Source
|
||||
* Format: uri
|
||||
* @description Where to download from. Specific types specified in child classes.
|
||||
*/
|
||||
source: string;
|
||||
/**
|
||||
* Dest
|
||||
* Format: path
|
||||
* @description Destination of downloaded model on local disk; a directory or file path
|
||||
*/
|
||||
dest: string;
|
||||
/**
|
||||
* Access Token
|
||||
* @description authorization token for protected resources
|
||||
*/
|
||||
access_token?: string | null;
|
||||
/**
|
||||
* Id
|
||||
* @description Numeric ID of this job
|
||||
@@ -3812,36 +3802,21 @@ export type components = {
|
||||
*/
|
||||
id?: number;
|
||||
/**
|
||||
* Priority
|
||||
* @description Queue priority; lower values are higher priority
|
||||
* @default 10
|
||||
* Dest
|
||||
* Format: path
|
||||
* @description Initial destination of downloaded model on local disk; a directory or file path
|
||||
*/
|
||||
priority?: number;
|
||||
dest: string;
|
||||
/**
|
||||
* Download Path
|
||||
* @description Final location of downloaded file or directory
|
||||
*/
|
||||
download_path?: string | null;
|
||||
/**
|
||||
* @description Status of the download
|
||||
* @default waiting
|
||||
*/
|
||||
status?: components["schemas"]["DownloadJobStatus"];
|
||||
/**
|
||||
* Download Path
|
||||
* @description Final location of downloaded file
|
||||
*/
|
||||
download_path?: string | null;
|
||||
/**
|
||||
* Job Started
|
||||
* @description Timestamp for when the download job started
|
||||
*/
|
||||
job_started?: string | null;
|
||||
/**
|
||||
* Job Ended
|
||||
* @description Timestamp for when the download job ende1d (completed or errored)
|
||||
*/
|
||||
job_ended?: string | null;
|
||||
/**
|
||||
* Content Type
|
||||
* @description Content type of downloaded file
|
||||
*/
|
||||
content_type?: string | null;
|
||||
/**
|
||||
* Bytes
|
||||
* @description Bytes downloaded so far
|
||||
@@ -3864,6 +3839,38 @@ export type components = {
|
||||
* @description Traceback of the exception that caused an error
|
||||
*/
|
||||
error?: string | null;
|
||||
/**
|
||||
* Source
|
||||
* Format: uri
|
||||
* @description Where to download from. Specific types specified in child classes.
|
||||
*/
|
||||
source: string;
|
||||
/**
|
||||
* Access Token
|
||||
* @description authorization token for protected resources
|
||||
*/
|
||||
access_token?: string | null;
|
||||
/**
|
||||
* Priority
|
||||
* @description Queue priority; lower values are higher priority
|
||||
* @default 10
|
||||
*/
|
||||
priority?: number;
|
||||
/**
|
||||
* Job Started
|
||||
* @description Timestamp for when the download job started
|
||||
*/
|
||||
job_started?: string | null;
|
||||
/**
|
||||
* Job Ended
|
||||
* @description Timestamp for when the download job ende1d (completed or errored)
|
||||
*/
|
||||
job_ended?: string | null;
|
||||
/**
|
||||
* Content Type
|
||||
* @description Content type of downloaded file
|
||||
*/
|
||||
content_type?: string | null;
|
||||
};
|
||||
/**
|
||||
* DownloadJobStatus
|
||||
@@ -7276,144 +7283,144 @@ export type components = {
|
||||
project_id: string | null;
|
||||
};
|
||||
InvocationOutputMap: {
|
||||
pidi_image_processor: components["schemas"]["ImageOutput"];
|
||||
image_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
vae_loader: components["schemas"]["VAEOutput"];
|
||||
collect: components["schemas"]["CollectInvocationOutput"];
|
||||
string_join_three: components["schemas"]["StringOutput"];
|
||||
content_shuffle_image_processor: components["schemas"]["ImageOutput"];
|
||||
random_range: components["schemas"]["IntegerCollectionOutput"];
|
||||
ip_adapter: components["schemas"]["IPAdapterOutput"];
|
||||
step_param_easing: components["schemas"]["FloatCollectionOutput"];
|
||||
core_metadata: components["schemas"]["MetadataOutput"];
|
||||
main_model_loader: components["schemas"]["ModelLoaderOutput"];
|
||||
leres_image_processor: components["schemas"]["ImageOutput"];
|
||||
calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
|
||||
color_correct: components["schemas"]["ImageOutput"];
|
||||
calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
|
||||
float_range: components["schemas"]["FloatCollectionOutput"];
|
||||
infill_cv2: components["schemas"]["ImageOutput"];
|
||||
img_channel_multiply: components["schemas"]["ImageOutput"];
|
||||
img_pad_crop: components["schemas"]["ImageOutput"];
|
||||
sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
face_mask_detection: components["schemas"]["FaceMaskOutput"];
|
||||
infill_lama: components["schemas"]["ImageOutput"];
|
||||
mask_combine: components["schemas"]["ImageOutput"];
|
||||
sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
segment_anything_processor: components["schemas"]["ImageOutput"];
|
||||
merge_metadata: components["schemas"]["MetadataOutput"];
|
||||
img_ilerp: components["schemas"]["ImageOutput"];
|
||||
heuristic_resize: components["schemas"]["ImageOutput"];
|
||||
cv_inpaint: components["schemas"]["ImageOutput"];
|
||||
div: components["schemas"]["IntegerOutput"];
|
||||
pair_tile_image: components["schemas"]["PairTileImageOutput"];
|
||||
float_math: components["schemas"]["FloatOutput"];
|
||||
img_channel_offset: components["schemas"]["ImageOutput"];
|
||||
canvas_paste_back: components["schemas"]["ImageOutput"];
|
||||
canny_image_processor: components["schemas"]["ImageOutput"];
|
||||
integer_collection: components["schemas"]["IntegerCollectionOutput"];
|
||||
freeu: components["schemas"]["UNetOutput"];
|
||||
lresize: components["schemas"]["LatentsOutput"];
|
||||
range_of_size: components["schemas"]["IntegerCollectionOutput"];
|
||||
depth_anything_image_processor: components["schemas"]["ImageOutput"];
|
||||
float_to_int: components["schemas"]["IntegerOutput"];
|
||||
rand_int: components["schemas"]["IntegerOutput"];
|
||||
lineart_anime_image_processor: components["schemas"]["ImageOutput"];
|
||||
string_split: components["schemas"]["String2Output"];
|
||||
img_nsfw: components["schemas"]["ImageOutput"];
|
||||
string: components["schemas"]["StringOutput"];
|
||||
mask_edge: components["schemas"]["ImageOutput"];
|
||||
i2l: components["schemas"]["LatentsOutput"];
|
||||
face_identifier: components["schemas"]["ImageOutput"];
|
||||
compel: components["schemas"]["ConditioningOutput"];
|
||||
esrgan: components["schemas"]["ImageOutput"];
|
||||
seamless: components["schemas"]["SeamlessModeOutput"];
|
||||
mask_from_id: components["schemas"]["ImageOutput"];
|
||||
invert_tensor_mask: components["schemas"]["MaskOutput"];
|
||||
rectangle_mask: components["schemas"]["MaskOutput"];
|
||||
conditioning: components["schemas"]["ConditioningOutput"];
|
||||
t2i_adapter: components["schemas"]["T2IAdapterOutput"];
|
||||
string_collection: components["schemas"]["StringCollectionOutput"];
|
||||
show_image: components["schemas"]["ImageOutput"];
|
||||
dw_openpose_image_processor: components["schemas"]["ImageOutput"];
|
||||
string_split_neg: components["schemas"]["StringPosNegOutput"];
|
||||
conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
|
||||
infill_patchmatch: components["schemas"]["ImageOutput"];
|
||||
img_conv: components["schemas"]["ImageOutput"];
|
||||
unsharp_mask: components["schemas"]["ImageOutput"];
|
||||
metadata_item: components["schemas"]["MetadataItemOutput"];
|
||||
image: components["schemas"]["ImageOutput"];
|
||||
image_collection: components["schemas"]["ImageCollectionOutput"];
|
||||
tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
|
||||
lblend: components["schemas"]["LatentsOutput"];
|
||||
float: components["schemas"]["FloatOutput"];
|
||||
boolean_collection: components["schemas"]["BooleanCollectionOutput"];
|
||||
color: components["schemas"]["ColorOutput"];
|
||||
midas_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
zoe_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
infill_rgba: components["schemas"]["ImageOutput"];
|
||||
mlsd_image_processor: components["schemas"]["ImageOutput"];
|
||||
lscale: components["schemas"]["LatentsOutput"];
|
||||
string_split: components["schemas"]["String2Output"];
|
||||
mask_edge: components["schemas"]["ImageOutput"];
|
||||
content_shuffle_image_processor: components["schemas"]["ImageOutput"];
|
||||
color_correct: components["schemas"]["ImageOutput"];
|
||||
save_image: components["schemas"]["ImageOutput"];
|
||||
show_image: components["schemas"]["ImageOutput"];
|
||||
segment_anything_processor: components["schemas"]["ImageOutput"];
|
||||
latents: components["schemas"]["LatentsOutput"];
|
||||
lineart_image_processor: components["schemas"]["ImageOutput"];
|
||||
hed_image_processor: components["schemas"]["ImageOutput"];
|
||||
infill_lama: components["schemas"]["ImageOutput"];
|
||||
infill_patchmatch: components["schemas"]["ImageOutput"];
|
||||
float_collection: components["schemas"]["FloatCollectionOutput"];
|
||||
denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
metadata: components["schemas"]["MetadataOutput"];
|
||||
compel: components["schemas"]["ConditioningOutput"];
|
||||
img_blur: components["schemas"]["ImageOutput"];
|
||||
img_crop: components["schemas"]["ImageOutput"];
|
||||
sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
img_ilerp: components["schemas"]["ImageOutput"];
|
||||
img_paste: components["schemas"]["ImageOutput"];
|
||||
core_metadata: components["schemas"]["MetadataOutput"];
|
||||
lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
lora_selector: components["schemas"]["LoRASelectorOutput"];
|
||||
create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
|
||||
rectangle_mask: components["schemas"]["MaskOutput"];
|
||||
noise: components["schemas"]["NoiseOutput"];
|
||||
float_to_int: components["schemas"]["IntegerOutput"];
|
||||
esrgan: components["schemas"]["ImageOutput"];
|
||||
merge_tiles_to_image: components["schemas"]["ImageOutput"];
|
||||
prompt_from_file: components["schemas"]["StringCollectionOutput"];
|
||||
boolean: components["schemas"]["BooleanOutput"];
|
||||
create_gradient_mask: components["schemas"]["GradientMaskOutput"];
|
||||
rand_float: components["schemas"]["FloatOutput"];
|
||||
img_mul: components["schemas"]["ImageOutput"];
|
||||
controlnet: components["schemas"]["ControlOutput"];
|
||||
latents_collection: components["schemas"]["LatentsCollectionOutput"];
|
||||
img_lerp: components["schemas"]["ImageOutput"];
|
||||
noise: components["schemas"]["NoiseOutput"];
|
||||
iterate: components["schemas"]["IterateInvocationOutput"];
|
||||
lineart_image_processor: components["schemas"]["ImageOutput"];
|
||||
tomask: components["schemas"]["ImageOutput"];
|
||||
integer: components["schemas"]["IntegerOutput"];
|
||||
create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
|
||||
clip_skip: components["schemas"]["CLIPSkipInvocationOutput"];
|
||||
denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
string_join: components["schemas"]["StringOutput"];
|
||||
scheduler: components["schemas"]["SchedulerOutput"];
|
||||
model_identifier: components["schemas"]["ModelIdentifierOutput"];
|
||||
normalbae_image_processor: components["schemas"]["ImageOutput"];
|
||||
face_off: components["schemas"]["FaceOffOutput"];
|
||||
hed_image_processor: components["schemas"]["ImageOutput"];
|
||||
img_paste: components["schemas"]["ImageOutput"];
|
||||
img_chan: components["schemas"]["ImageOutput"];
|
||||
img_watermark: components["schemas"]["ImageOutput"];
|
||||
l2i: components["schemas"]["ImageOutput"];
|
||||
string_replace: components["schemas"]["StringOutput"];
|
||||
color_map_image_processor: components["schemas"]["ImageOutput"];
|
||||
tile_image_processor: components["schemas"]["ImageOutput"];
|
||||
crop_latents: components["schemas"]["LatentsOutput"];
|
||||
sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
add: components["schemas"]["IntegerOutput"];
|
||||
sub: components["schemas"]["IntegerOutput"];
|
||||
img_scale: components["schemas"]["ImageOutput"];
|
||||
range: components["schemas"]["IntegerCollectionOutput"];
|
||||
dynamic_prompt: components["schemas"]["StringCollectionOutput"];
|
||||
img_crop: components["schemas"]["ImageOutput"];
|
||||
infill_tile: components["schemas"]["ImageOutput"];
|
||||
img_resize: components["schemas"]["ImageOutput"];
|
||||
mediapipe_face_processor: components["schemas"]["ImageOutput"];
|
||||
sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
|
||||
lora_selector: components["schemas"]["LoRASelectorOutput"];
|
||||
img_hue_adjust: components["schemas"]["ImageOutput"];
|
||||
latents: components["schemas"]["LatentsOutput"];
|
||||
lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
img_blur: components["schemas"]["ImageOutput"];
|
||||
ideal_size: components["schemas"]["IdealSizeOutput"];
|
||||
float_collection: components["schemas"]["FloatCollectionOutput"];
|
||||
blank_image: components["schemas"]["ImageOutput"];
|
||||
integer_math: components["schemas"]["IntegerOutput"];
|
||||
lora_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
metadata: components["schemas"]["MetadataOutput"];
|
||||
infill_rgba: components["schemas"]["ImageOutput"];
|
||||
sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
round_float: components["schemas"]["FloatOutput"];
|
||||
sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
|
||||
mul: components["schemas"]["IntegerOutput"];
|
||||
alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
lscale: components["schemas"]["LatentsOutput"];
|
||||
save_image: components["schemas"]["ImageOutput"];
|
||||
lora_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
iterate: components["schemas"]["IterateInvocationOutput"];
|
||||
t2i_adapter: components["schemas"]["T2IAdapterOutput"];
|
||||
color_map_image_processor: components["schemas"]["ImageOutput"];
|
||||
blank_image: components["schemas"]["ImageOutput"];
|
||||
normalbae_image_processor: components["schemas"]["ImageOutput"];
|
||||
canvas_paste_back: components["schemas"]["ImageOutput"];
|
||||
string_split_neg: components["schemas"]["StringPosNegOutput"];
|
||||
img_channel_offset: components["schemas"]["ImageOutput"];
|
||||
face_mask_detection: components["schemas"]["FaceMaskOutput"];
|
||||
cv_inpaint: components["schemas"]["ImageOutput"];
|
||||
clip_skip: components["schemas"]["CLIPSkipInvocationOutput"];
|
||||
invert_tensor_mask: components["schemas"]["MaskOutput"];
|
||||
tomask: components["schemas"]["ImageOutput"];
|
||||
main_model_loader: components["schemas"]["ModelLoaderOutput"];
|
||||
img_watermark: components["schemas"]["ImageOutput"];
|
||||
img_pad_crop: components["schemas"]["ImageOutput"];
|
||||
random_range: components["schemas"]["IntegerCollectionOutput"];
|
||||
mlsd_image_processor: components["schemas"]["ImageOutput"];
|
||||
merge_metadata: components["schemas"]["MetadataOutput"];
|
||||
string_join: components["schemas"]["StringOutput"];
|
||||
vae_loader: components["schemas"]["VAEOutput"];
|
||||
calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
|
||||
calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"];
|
||||
mask_from_id: components["schemas"]["ImageOutput"];
|
||||
zoe_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
img_resize: components["schemas"]["ImageOutput"];
|
||||
string_replace: components["schemas"]["StringOutput"];
|
||||
face_identifier: components["schemas"]["ImageOutput"];
|
||||
canny_image_processor: components["schemas"]["ImageOutput"];
|
||||
collect: components["schemas"]["CollectInvocationOutput"];
|
||||
infill_tile: components["schemas"]["ImageOutput"];
|
||||
integer_collection: components["schemas"]["IntegerCollectionOutput"];
|
||||
img_lerp: components["schemas"]["ImageOutput"];
|
||||
step_param_easing: components["schemas"]["FloatCollectionOutput"];
|
||||
lresize: components["schemas"]["LatentsOutput"];
|
||||
img_mul: components["schemas"]["ImageOutput"];
|
||||
create_gradient_mask: components["schemas"]["GradientMaskOutput"];
|
||||
img_scale: components["schemas"]["ImageOutput"];
|
||||
rand_float: components["schemas"]["FloatOutput"];
|
||||
tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
|
||||
calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
|
||||
range_of_size: components["schemas"]["IntegerCollectionOutput"];
|
||||
sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
|
||||
heuristic_resize: components["schemas"]["ImageOutput"];
|
||||
controlnet: components["schemas"]["ControlOutput"];
|
||||
string: components["schemas"]["StringOutput"];
|
||||
tile_image_processor: components["schemas"]["ImageOutput"];
|
||||
metadata_item: components["schemas"]["MetadataItemOutput"];
|
||||
freeu: components["schemas"]["UNetOutput"];
|
||||
round_float: components["schemas"]["FloatOutput"];
|
||||
conditioning: components["schemas"]["ConditioningOutput"];
|
||||
ideal_size: components["schemas"]["IdealSizeOutput"];
|
||||
float: components["schemas"]["FloatOutput"];
|
||||
conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
|
||||
alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
integer_math: components["schemas"]["IntegerOutput"];
|
||||
string_collection: components["schemas"]["StringCollectionOutput"];
|
||||
img_conv: components["schemas"]["ImageOutput"];
|
||||
img_channel_multiply: components["schemas"]["ImageOutput"];
|
||||
lblend: components["schemas"]["LatentsOutput"];
|
||||
color: components["schemas"]["ColorOutput"];
|
||||
image: components["schemas"]["ImageOutput"];
|
||||
sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
|
||||
image_collection: components["schemas"]["ImageCollectionOutput"];
|
||||
model_identifier: components["schemas"]["ModelIdentifierOutput"];
|
||||
l2i: components["schemas"]["ImageOutput"];
|
||||
seamless: components["schemas"]["SeamlessModeOutput"];
|
||||
boolean_collection: components["schemas"]["BooleanCollectionOutput"];
|
||||
string_join_three: components["schemas"]["StringOutput"];
|
||||
ip_adapter: components["schemas"]["IPAdapterOutput"];
|
||||
add: components["schemas"]["IntegerOutput"];
|
||||
crop_latents: components["schemas"]["LatentsOutput"];
|
||||
float_range: components["schemas"]["FloatCollectionOutput"];
|
||||
mul: components["schemas"]["IntegerOutput"];
|
||||
dw_openpose_image_processor: components["schemas"]["ImageOutput"];
|
||||
boolean: components["schemas"]["BooleanOutput"];
|
||||
dynamic_prompt: components["schemas"]["StringCollectionOutput"];
|
||||
mediapipe_face_processor: components["schemas"]["ImageOutput"];
|
||||
i2l: components["schemas"]["LatentsOutput"];
|
||||
latents_collection: components["schemas"]["LatentsCollectionOutput"];
|
||||
integer: components["schemas"]["IntegerOutput"];
|
||||
img_chan: components["schemas"]["ImageOutput"];
|
||||
pair_tile_image: components["schemas"]["PairTileImageOutput"];
|
||||
unsharp_mask: components["schemas"]["ImageOutput"];
|
||||
img_hue_adjust: components["schemas"]["ImageOutput"];
|
||||
lineart_anime_image_processor: components["schemas"]["ImageOutput"];
|
||||
face_off: components["schemas"]["FaceOffOutput"];
|
||||
mask_combine: components["schemas"]["ImageOutput"];
|
||||
leres_image_processor: components["schemas"]["ImageOutput"];
|
||||
image_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
scheduler: components["schemas"]["SchedulerOutput"];
|
||||
sub: components["schemas"]["IntegerOutput"];
|
||||
pidi_image_processor: components["schemas"]["ImageOutput"];
|
||||
infill_cv2: components["schemas"]["ImageOutput"];
|
||||
div: components["schemas"]["IntegerOutput"];
|
||||
img_nsfw: components["schemas"]["ImageOutput"];
|
||||
depth_anything_image_processor: components["schemas"]["ImageOutput"];
|
||||
sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
range: components["schemas"]["IntegerCollectionOutput"];
|
||||
rand_int: components["schemas"]["IntegerOutput"];
|
||||
float_math: components["schemas"]["FloatOutput"];
|
||||
};
|
||||
/**
|
||||
* InvocationStartedEvent
|
||||
@@ -9443,6 +9450,49 @@ export type components = {
|
||||
[key: string]: number | string;
|
||||
})[];
|
||||
};
|
||||
/**
|
||||
* ModelInstallDownloadStartedEvent
|
||||
* @description Event model for model_install_download_started
|
||||
*/
|
||||
ModelInstallDownloadStartedEvent: {
|
||||
/**
|
||||
* Timestamp
|
||||
* @description The timestamp of the event
|
||||
*/
|
||||
timestamp: number;
|
||||
/**
|
||||
* Id
|
||||
* @description The ID of the install job
|
||||
*/
|
||||
id: number;
|
||||
/**
|
||||
* Source
|
||||
* @description Source of the model; local path, repo_id or url
|
||||
*/
|
||||
source: string;
|
||||
/**
|
||||
* Local Path
|
||||
* @description Where model is downloading to
|
||||
*/
|
||||
local_path: string;
|
||||
/**
|
||||
* Bytes
|
||||
* @description Number of bytes downloaded so far
|
||||
*/
|
||||
bytes: number;
|
||||
/**
|
||||
* Total Bytes
|
||||
* @description Total size of download, including all files
|
||||
*/
|
||||
total_bytes: number;
|
||||
/**
|
||||
* Parts
|
||||
* @description Progress of downloading URLs that comprise the model, if any
|
||||
*/
|
||||
parts: ({
|
||||
[key: string]: number | string;
|
||||
})[];
|
||||
};
|
||||
/**
|
||||
* ModelInstallDownloadsCompleteEvent
|
||||
* @description Emitted once when an install job becomes active.
|
||||
@@ -10671,8 +10721,9 @@ export type components = {
|
||||
/**
|
||||
* Size
|
||||
* @description The size of this file, in bytes
|
||||
* @default 0
|
||||
*/
|
||||
size: number;
|
||||
size?: number | null;
|
||||
/**
|
||||
* Sha256
|
||||
* @description SHA256 hash of this model (not always available)
|
||||
@@ -14050,6 +14101,40 @@ export type operations = {
|
||||
};
|
||||
};
|
||||
};
|
||||
/**
|
||||
* Install Hugging Face Model
|
||||
* @description Install a Hugging Face model using a string identifier.
|
||||
*/
|
||||
install_hugging_face_model: {
|
||||
parameters: {
|
||||
query: {
|
||||
/** @description Hugging Face repo_id to install */
|
||||
source: string;
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description The model is being installed */
|
||||
201: {
|
||||
content: {
|
||||
"text/html": string;
|
||||
};
|
||||
};
|
||||
/** @description Bad request */
|
||||
400: {
|
||||
content: never;
|
||||
};
|
||||
/** @description There is already a model corresponding to this path or repo_id */
|
||||
409: {
|
||||
content: never;
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
/**
|
||||
* Get Model Install Job
|
||||
* @description Return model install job corresponding to the given source. See the documentation for 'List Model Install Jobs'
|
||||
|
||||
@@ -16,6 +16,7 @@ import type {
|
||||
ModelInstallCompleteEvent,
|
||||
ModelInstallDownloadProgressEvent,
|
||||
ModelInstallDownloadsCompleteEvent,
|
||||
ModelInstallDownloadStartedEvent,
|
||||
ModelInstallErrorEvent,
|
||||
ModelInstallStartedEvent,
|
||||
ModelLoadCompleteEvent,
|
||||
@@ -45,6 +46,9 @@ export const socketModelInstallStarted = createSocketAction<ModelInstallStartedE
|
||||
export const socketModelInstallDownloadProgress = createSocketAction<ModelInstallDownloadProgressEvent>(
|
||||
'ModelInstallDownloadProgressEvent'
|
||||
);
|
||||
export const socketModelInstallDownloadStarted = createSocketAction<ModelInstallDownloadStartedEvent>(
|
||||
'ModelInstallDownloadStartedEvent'
|
||||
);
|
||||
export const socketModelInstallDownloadsComplete = createSocketAction<ModelInstallDownloadsCompleteEvent>(
|
||||
'ModelInstallDownloadsCompleteEvent'
|
||||
);
|
||||
|
||||
@@ -9,6 +9,7 @@ export type InvocationCompleteEvent = S['InvocationCompleteEvent'];
|
||||
export type InvocationErrorEvent = S['InvocationErrorEvent'];
|
||||
export type ProgressImage = InvocationDenoiseProgressEvent['progress_image'];
|
||||
|
||||
export type ModelInstallDownloadStartedEvent = S['ModelInstallDownloadStartedEvent'];
|
||||
export type ModelInstallDownloadProgressEvent = S['ModelInstallDownloadProgressEvent'];
|
||||
export type ModelInstallDownloadsCompleteEvent = S['ModelInstallDownloadsCompleteEvent'];
|
||||
export type ModelInstallCompleteEvent = S['ModelInstallCompleteEvent'];
|
||||
@@ -49,6 +50,7 @@ export type ServerToClientEvents = {
|
||||
download_error: (payload: DownloadErrorEvent) => void;
|
||||
model_load_started: (payload: ModelLoadStartedEvent) => void;
|
||||
model_install_started: (payload: ModelInstallStartedEvent) => void;
|
||||
model_install_download_started: (payload: ModelInstallDownloadStartedEvent) => void;
|
||||
model_install_download_progress: (payload: ModelInstallDownloadProgressEvent) => void;
|
||||
model_install_downloads_complete: (payload: ModelInstallDownloadsCompleteEvent) => void;
|
||||
model_install_complete: (payload: ModelInstallCompleteEvent) => void;
|
||||
|
||||
@@ -31,7 +31,6 @@ from invokeai.app.invocations.fields import (
|
||||
WithMetadata,
|
||||
WithWorkflow,
|
||||
)
|
||||
from invokeai.app.invocations.latent import SchedulerOutput
|
||||
from invokeai.app.invocations.metadata import MetadataItemField, MetadataItemOutput, MetadataOutput
|
||||
from invokeai.app.invocations.model import (
|
||||
CLIPField,
|
||||
@@ -64,6 +63,7 @@ from invokeai.app.invocations.primitives import (
|
||||
StringCollectionOutput,
|
||||
StringOutput,
|
||||
)
|
||||
from invokeai.app.invocations.scheduler import SchedulerOutput
|
||||
from invokeai.app.services.boards.boards_common import BoardDTO
|
||||
from invokeai.app.services.config.config_default import InvokeAIAppConfig
|
||||
from invokeai.app.services.image_records.image_records_common import ImageCategory
|
||||
@@ -108,7 +108,7 @@ __all__ = [
|
||||
"WithBoard",
|
||||
"WithMetadata",
|
||||
"WithWorkflow",
|
||||
# invokeai.app.invocations.latent
|
||||
# invokeai.app.invocations.scheduler
|
||||
"SchedulerOutput",
|
||||
# invokeai.app.invocations.metadata
|
||||
"MetadataItemField",
|
||||
|
||||
@@ -224,7 +224,7 @@ follow_imports = "skip" # skips type checking of the modules listed below
|
||||
module = [
|
||||
"invokeai.app.api.routers.models",
|
||||
"invokeai.app.invocations.compel",
|
||||
"invokeai.app.invocations.latent",
|
||||
"invokeai.app.invocations.denoise_latents",
|
||||
"invokeai.app.services.invocation_stats.invocation_stats_default",
|
||||
"invokeai.app.services.model_manager.model_manager_base",
|
||||
"invokeai.app.services.model_manager.model_manager_default",
|
||||
|
||||
@@ -17,6 +17,7 @@ from invokeai.app.services.events.events_common import (
|
||||
ModelInstallCompleteEvent,
|
||||
ModelInstallDownloadProgressEvent,
|
||||
ModelInstallDownloadsCompleteEvent,
|
||||
ModelInstallDownloadStartedEvent,
|
||||
ModelInstallStartedEvent,
|
||||
)
|
||||
from invokeai.app.services.model_install import (
|
||||
@@ -252,7 +253,7 @@ def test_simple_download(mm2_installer: ModelInstallServiceBase, mm2_app_config:
|
||||
assert (mm2_app_config.models_path / model_record.path).exists()
|
||||
|
||||
assert len(bus.events) == 5
|
||||
assert isinstance(bus.events[0], ModelInstallDownloadProgressEvent) # download starts
|
||||
assert isinstance(bus.events[0], ModelInstallDownloadStartedEvent) # download starts
|
||||
assert isinstance(bus.events[1], ModelInstallDownloadProgressEvent) # download progresses
|
||||
assert isinstance(bus.events[2], ModelInstallDownloadsCompleteEvent) # download completed
|
||||
assert isinstance(bus.events[3], ModelInstallStartedEvent) # install started
|
||||
|
||||
Reference in New Issue
Block a user