WIP - Started working towards MultiDiffusion batching.

Delete rough notes.
Fix advanced scheduler behaviour in MultiDiffusionPipeline.
2026-01-25 05:57:59 -05:00 · 2024-06-18 15:44:39 -04:00 · 2024-06-18 15:36:36 -04:00 · 2024-06-18 15:36:36 -04:00 · 2024-06-18 15:36:36 -04:00 · 2024-06-18 15:36:34 -04:00
41 changed files with 3498 additions and 1999 deletions
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -9,7 +9,7 @@ from copy import deepcopy
 from typing import Any, Dict, List, Optional, Type

 from fastapi import Body, Path, Query, Response, UploadFile
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, HTMLResponse
 from fastapi.routing import APIRouter
 from PIL import Image
 from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field
@@ -502,6 +502,133 @@ async def install_model(
    return result


+@model_manager_router.get(
+    "/install/huggingface",
+    operation_id="install_hugging_face_model",
+    responses={
+        201: {"description": "The model is being installed"},
+        400: {"description": "Bad request"},
+        409: {"description": "There is already a model corresponding to this path or repo_id"},
+    },
+    status_code=201,
+    response_class=HTMLResponse,
+)
+async def install_hugging_face_model(
+    source: str = Query(description="HuggingFace repo_id to install"),
+) -> HTMLResponse:
+    """Install a Hugging Face model using a string identifier."""
+
+    def generate_html(title: str, heading: str, repo_id: str, is_error: bool, message: str | None = "") -> str:
+        if message:
+            message = f"<p>{message}</p>"
+        title_class = "error" if is_error else "success"
+        return f"""
+            <html>
+
+            <head>
+                <title>{title}</title>
+                <style>
+                    body {{
+                        text-align: center;
+                        background-color: hsl(220 12% 10% / 1);
+                        font-family: Helvetica, sans-serif;
+                        color: hsl(220 12% 86% / 1);
+                    }}
+
+                    .repo-id {{
+                        color: hsl(220 12% 68% / 1);
+                    }}
+
+                    .error {{
+                        color: hsl(0 42% 68% / 1)
+                    }}
+
+                    .message-box {{
+                        display: inline-block;
+                        border-radius: 5px;
+                        background-color: hsl(220 12% 20% / 1);
+                        padding-inline-end: 30px;
+                        padding: 20px;
+                        padding-inline-start: 30px;
+                        padding-inline-end: 30px;
+                    }}
+
+                    .container {{
+                        display: flex;
+                        width: 100%;
+                        height: 100%;
+                        align-items: center;
+                        justify-content: center;
+                    }}
+
+                    a {{
+                        color: inherit
+                    }}
+
+                    a:visited {{
+                        color: inherit
+                    }}
+
+                    a:active {{
+                        color: inherit
+                    }}
+                </style>
+            </head>
+
+            <body style="background-color: hsl(220 12% 10% / 1);">
+                <div class="container">
+                    <div class="message-box">
+                        <h2 class="{title_class}">{heading}</h2>
+                        {message}
+                        <p class="repo-id">Repo ID: {repo_id}</p>
+                    </div>
+                </div>
+            </body>
+
+            </html>
+        """
+
+    try:
+        metadata = HuggingFaceMetadataFetch().from_id(source)
+        assert isinstance(metadata, ModelMetadataWithFiles)
+    except UnknownMetadataException:
+        title = "Unable to Install Model"
+        heading = "No HuggingFace repository found with that repo ID."
+        message = "Ensure the repo ID is correct and try again."
+        return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=400)
+
+    logger = ApiDependencies.invoker.services.logger
+
+    try:
+        installer = ApiDependencies.invoker.services.model_manager.install
+        if metadata.is_diffusers:
+            installer.heuristic_import(
+                source=source,
+                inplace=False,
+            )
+        elif metadata.ckpt_urls is not None and len(metadata.ckpt_urls) == 1:
+            installer.heuristic_import(
+                source=str(metadata.ckpt_urls[0]),
+                inplace=False,
+            )
+        else:
+            title = "Unable to Install Model"
+            heading = "This HuggingFace repo has multiple models."
+            message = "Please use the Model Manager to install this model."
+            return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=200)
+
+        title = "Model Install Started"
+        heading = "Your HuggingFace model is installing now."
+        message = "You can close this tab and check the Model Manager for installation progress."
+        return HTMLResponse(content=generate_html(title, heading, source, False, message), status_code=201)
+    except Exception as e:
+        logger.error(str(e))
+        title = "Unable to Install Model"
+        heading = "There was an problem installing this model."
+        message = 'Please use the Model Manager directly to install this model. If the issue persists, ask for help on <a href="https://discord.gg/ZmtBAhwWhy">discord</a>.'
+        return HTMLResponse(content=generate_html(title, heading, source, True, message), status_code=500)
+
+
@model_manager_router.get(
    "/install",
    operation_id="list_model_installs",
--- a/invokeai/app/invocations/blend_latents.py
+++ b/invokeai/app/invocations/blend_latents.py
@@ -0,0 +1,98 @@
+from typing import Any, Union
+
+import numpy as np
+import numpy.typing as npt
+import torch
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation(
+    "lblend",
+    title="Blend Latents",
+    tags=["latents", "blend"],
+    category="latents",
+    version="1.0.3",
+)
+class BlendLatentsInvocation(BaseInvocation):
+    """Blend two latents using a given alpha. Latents must have same size."""
+
+    latents_a: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    latents_b: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    alpha: float = InputField(default=0.5, description=FieldDescriptions.blend_alpha)
+
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents_a = context.tensors.load(self.latents_a.latents_name)
+        latents_b = context.tensors.load(self.latents_b.latents_name)
+
+        if latents_a.shape != latents_b.shape:
+            raise Exception("Latents to blend must be the same size.")
+
+        device = TorchDevice.choose_torch_device()
+
+        def slerp(
+            t: Union[float, npt.NDArray[Any]],  # FIXME: maybe use np.float32 here?
+            v0: Union[torch.Tensor, npt.NDArray[Any]],
+            v1: Union[torch.Tensor, npt.NDArray[Any]],
+            DOT_THRESHOLD: float = 0.9995,
+        ) -> Union[torch.Tensor, npt.NDArray[Any]]:
+            """
+            Spherical linear interpolation
+            Args:
+                t (float/np.ndarray): Float value between 0.0 and 1.0
+                v0 (np.ndarray): Starting vector
+                v1 (np.ndarray): Final vector
+                DOT_THRESHOLD (float): Threshold for considering the two vectors as
+                                    colineal. Not recommended to alter this.
+            Returns:
+                v2 (np.ndarray): Interpolation vector between v0 and v1
+            """
+            inputs_are_torch = False
+            if not isinstance(v0, np.ndarray):
+                inputs_are_torch = True
+                v0 = v0.detach().cpu().numpy()
+            if not isinstance(v1, np.ndarray):
+                inputs_are_torch = True
+                v1 = v1.detach().cpu().numpy()
+
+            dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
+            if np.abs(dot) > DOT_THRESHOLD:
+                v2 = (1 - t) * v0 + t * v1
+            else:
+                theta_0 = np.arccos(dot)
+                sin_theta_0 = np.sin(theta_0)
+                theta_t = theta_0 * t
+                sin_theta_t = np.sin(theta_t)
+                s0 = np.sin(theta_0 - theta_t) / sin_theta_0
+                s1 = sin_theta_t / sin_theta_0
+                v2 = s0 * v0 + s1 * v1
+
+            if inputs_are_torch:
+                v2_torch: torch.Tensor = torch.from_numpy(v2).to(device)
+                return v2_torch
+            else:
+                assert isinstance(v2, np.ndarray)
+                return v2
+
+        # blend
+        bl = slerp(self.alpha, latents_a, latents_b)
+        assert isinstance(bl, torch.Tensor)
+        blended_latents: torch.Tensor = bl  # for type checking convenience
+
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        blended_latents = blended_latents.to("cpu")
+
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=blended_latents)
+        return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed)
--- a/invokeai/app/invocations/constants.py
+++ b/invokeai/app/invocations/constants.py
@@ -1,6 +1,7 @@
 from typing import Literal

 from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from invokeai.backend.util.devices import TorchDevice

 LATENT_SCALE_FACTOR = 8
 """
@@ -15,3 +16,5 @@ SCHEDULER_NAME_VALUES = Literal[tuple(SCHEDULER_MAP.keys())]

 IMAGE_MODES = Literal["L", "RGB", "RGBA", "CMYK", "YCbCr", "LAB", "HSV", "I", "F"]
 """A literal type for PIL image modes supported by Invoke"""
+
+DEFAULT_PRECISION = TorchDevice.choose_torch_dtype()
--- a/invokeai/app/invocations/create_denoise_mask.py
+++ b/invokeai/app/invocations/create_denoise_mask.py
@@ -0,0 +1,80 @@
+from typing import Optional
+
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from torchvision.transforms.functional import resize as tv_resize
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import DEFAULT_PRECISION
+from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField
+from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
+from invokeai.app.invocations.model import VAEField
+from invokeai.app.invocations.primitives import DenoiseMaskOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
+
+
+@invocation(
+    "create_denoise_mask",
+    title="Create Denoise Mask",
+    tags=["mask", "denoise"],
+    category="latents",
+    version="1.0.2",
+)
+class CreateDenoiseMaskInvocation(BaseInvocation):
+    """Creates mask for denoising model run."""
+
+    vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0)
+    image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1)
+    mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2)
+    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3)
+    fp32: bool = InputField(
+        default=DEFAULT_PRECISION == torch.float32,
+        description=FieldDescriptions.fp32,
+        ui_order=4,
+    )
+
+    def prep_mask_tensor(self, mask_image: Image.Image) -> torch.Tensor:
+        if mask_image.mode != "L":
+            mask_image = mask_image.convert("L")
+        mask_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
+        if mask_tensor.dim() == 3:
+            mask_tensor = mask_tensor.unsqueeze(0)
+        # if shape is not None:
+        #    mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR)
+        return mask_tensor
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> DenoiseMaskOutput:
+        if self.image is not None:
+            image = context.images.get_pil(self.image.image_name)
+            image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
+            if image_tensor.dim() == 3:
+                image_tensor = image_tensor.unsqueeze(0)
+        else:
+            image_tensor = None
+
+        mask = self.prep_mask_tensor(
+            context.images.get_pil(self.mask.image_name),
+        )
+
+        if image_tensor is not None:
+            vae_info = context.models.load(self.vae.vae)
+
+            img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
+            masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
+            # TODO:
+            masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone())
+
+            masked_latents_name = context.tensors.save(tensor=masked_latents)
+        else:
+            masked_latents_name = None
+
+        mask_name = context.tensors.save(tensor=mask)
+
+        return DenoiseMaskOutput.build(
+            mask_name=mask_name,
+            masked_latents_name=masked_latents_name,
+            gradient=False,
+        )
--- a/invokeai/app/invocations/create_gradient_mask.py
+++ b/invokeai/app/invocations/create_gradient_mask.py
@@ -0,0 +1,138 @@
+from typing import Literal, Optional
+
+import numpy as np
+import torch
+import torchvision.transforms as T
+from PIL import Image, ImageFilter
+from torchvision.transforms.functional import resize as tv_resize
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.constants import DEFAULT_PRECISION
+from invokeai.app.invocations.fields import (
+    DenoiseMaskField,
+    FieldDescriptions,
+    ImageField,
+    Input,
+    InputField,
+    OutputField,
+)
+from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
+from invokeai.app.invocations.model import UNetField, VAEField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager import LoadedModel
+from invokeai.backend.model_manager.config import MainConfigBase, ModelVariantType
+from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
+
+
+@invocation_output("gradient_mask_output")
+class GradientMaskOutput(BaseInvocationOutput):
+    """Outputs a denoise mask and an image representing the total gradient of the mask."""
+
+    denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run")
+    expanded_mask_area: ImageField = OutputField(
+        description="Image representing the total gradient area of the mask. For paste-back purposes."
+    )
+
+
+@invocation(
+    "create_gradient_mask",
+    title="Create Gradient Mask",
+    tags=["mask", "denoise"],
+    category="latents",
+    version="1.1.0",
+)
+class CreateGradientMaskInvocation(BaseInvocation):
+    """Creates mask for denoising model run."""
+
+    mask: ImageField = InputField(default=None, description="Image which will be masked", ui_order=1)
+    edge_radius: int = InputField(
+        default=16, ge=0, description="How far to blur/expand the edges of the mask", ui_order=2
+    )
+    coherence_mode: Literal["Gaussian Blur", "Box Blur", "Staged"] = InputField(default="Gaussian Blur", ui_order=3)
+    minimum_denoise: float = InputField(
+        default=0.0, ge=0, le=1, description="Minimum denoise level for the coherence region", ui_order=4
+    )
+    image: Optional[ImageField] = InputField(
+        default=None,
+        description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
+        title="[OPTIONAL] Image",
+        ui_order=6,
+    )
+    unet: Optional[UNetField] = InputField(
+        description="OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE",
+        default=None,
+        input=Input.Connection,
+        title="[OPTIONAL] UNet",
+        ui_order=5,
+    )
+    vae: Optional[VAEField] = InputField(
+        default=None,
+        description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
+        title="[OPTIONAL] VAE",
+        input=Input.Connection,
+        ui_order=7,
+    )
+    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=8)
+    fp32: bool = InputField(
+        default=DEFAULT_PRECISION == torch.float32,
+        description=FieldDescriptions.fp32,
+        ui_order=9,
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> GradientMaskOutput:
+        mask_image = context.images.get_pil(self.mask.image_name, mode="L")
+        if self.edge_radius > 0:
+            if self.coherence_mode == "Box Blur":
+                blur_mask = mask_image.filter(ImageFilter.BoxBlur(self.edge_radius))
+            else:  # Gaussian Blur OR Staged
+                # Gaussian Blur uses standard deviation. 1/2 radius is a good approximation
+                blur_mask = mask_image.filter(ImageFilter.GaussianBlur(self.edge_radius / 2))
+
+            blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(blur_mask, normalize=False)
+
+            # redistribute blur so that the original edges are 0 and blur outwards to 1
+            blur_tensor = (blur_tensor - 0.5) * 2
+
+            threshold = 1 - self.minimum_denoise
+
+            if self.coherence_mode == "Staged":
+                # wherever the blur_tensor is less than fully masked, convert it to threshold
+                blur_tensor = torch.where((blur_tensor < 1) & (blur_tensor > 0), threshold, blur_tensor)
+            else:
+                # wherever the blur_tensor is above threshold but less than 1, drop it to threshold
+                blur_tensor = torch.where((blur_tensor > threshold) & (blur_tensor < 1), threshold, blur_tensor)
+
+        else:
+            blur_tensor: torch.Tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
+
+        mask_name = context.tensors.save(tensor=blur_tensor.unsqueeze(1))
+
+        # compute a [0, 1] mask from the blur_tensor
+        expanded_mask = torch.where((blur_tensor < 1), 0, 1)
+        expanded_mask_image = Image.fromarray((expanded_mask.squeeze(0).numpy() * 255).astype(np.uint8), mode="L")
+        expanded_image_dto = context.images.save(expanded_mask_image)
+
+        masked_latents_name = None
+        if self.unet is not None and self.vae is not None and self.image is not None:
+            # all three fields must be present at the same time
+            main_model_config = context.models.get_config(self.unet.unet.key)
+            assert isinstance(main_model_config, MainConfigBase)
+            if main_model_config.variant is ModelVariantType.Inpaint:
+                mask = blur_tensor
+                vae_info: LoadedModel = context.models.load(self.vae.vae)
+                image = context.images.get_pil(self.image.image_name)
+                image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
+                if image_tensor.dim() == 3:
+                    image_tensor = image_tensor.unsqueeze(0)
+                img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
+                masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
+                masked_latents = ImageToLatentsInvocation.vae_encode(
+                    vae_info, self.fp32, self.tiled, masked_image.clone()
+                )
+                masked_latents_name = context.tensors.save(tensor=masked_latents)
+
+        return GradientMaskOutput(
+            denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=masked_latents_name, gradient=True),
+            expanded_mask_area=ImageField(image_name=expanded_image_dto.image_name),
+        )
--- a/invokeai/app/invocations/crop_latents.py
+++ b/invokeai/app/invocations/crop_latents.py
@@ -0,0 +1,61 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+
+
+# The Crop Latents node was copied from @skunkworxdark's implementation here:
+# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80
+@invocation(
+    "crop_latents",
+    title="Crop Latents",
+    tags=["latents", "crop"],
+    category="latents",
+    version="1.0.2",
+)
+# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`.
+# Currently, if the class names conflict then 'GET /openapi.json' fails.
+class CropLatentsCoreInvocation(BaseInvocation):
+    """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be
+    divisible by the latent scale factor of 8.
+    """
+
+    latents: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    x: int = InputField(
+        ge=0,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
+    )
+    y: int = InputField(
+        ge=0,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
+    )
+    width: int = InputField(
+        ge=1,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
+    )
+    height: int = InputField(
+        ge=1,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
+    )
+
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents = context.tensors.load(self.latents.latents_name)
+
+        x1 = self.x // LATENT_SCALE_FACTOR
+        y1 = self.y // LATENT_SCALE_FACTOR
+        x2 = x1 + (self.width // LATENT_SCALE_FACTOR)
+        y2 = y1 + (self.height // LATENT_SCALE_FACTOR)
+
+        cropped_latents = latents[..., y1:y2, x1:x2]
+
+        name = context.tensors.save(tensor=cropped_latents)
+
+        return LatentsOutput.build(latents_name=name, latents=cropped_latents)
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -0,0 +1,848 @@
+# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
+import inspect
+from contextlib import ExitStack
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+
+import torch
+import torchvision
+import torchvision.transforms as T
+from diffusers.configuration_utils import ConfigMixin
+from diffusers.models.adapter import T2IAdapter
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler
+from diffusers.schedulers.scheduling_tcd import TCDScheduler
+from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler
+from pydantic import field_validator
+from torchvision.transforms.functional import resize as tv_resize
+from transformers import CLIPVisionModelWithProjection
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.controlnet_image_processors import ControlField
+from invokeai.app.invocations.fields import (
+    ConditioningField,
+    DenoiseMaskField,
+    FieldDescriptions,
+    Input,
+    InputField,
+    LatentsField,
+    UIType,
+)
+from invokeai.app.invocations.ip_adapter import IPAdapterField
+from invokeai.app.invocations.model import ModelIdentifierField, UNetField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.invocations.t2i_adapter import T2IAdapterField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.app.util.controlnet_utils import prepare_control_image
+from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
+from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion.diffusers_pipeline import (
+    ControlNetData,
+    StableDiffusionGeneratorPipeline,
+    T2IAdapterData,
+)
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
+    BasicConditioningInfo,
+    IPAdapterConditioningInfo,
+    IPAdapterData,
+    Range,
+    SDXLConditioningInfo,
+    TextConditioningData,
+    TextConditioningRegions,
+)
+from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.hotfixes import ControlNetModel
+from invokeai.backend.util.mask import to_standard_float_mask
+from invokeai.backend.util.silence_warnings import SilenceWarnings
+
+
+def get_scheduler(
+    context: InvocationContext,
+    scheduler_info: ModelIdentifierField,
+    scheduler_name: str,
+    seed: int,
+) -> Scheduler:
+    """Load a scheduler and apply some scheduler-specific overrides."""
+    # TODO(ryand): Silently falling back to ddim seems like a bad idea. Look into why this was added and remove if
+    # possible.
+    scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP["ddim"])
+    orig_scheduler_info = context.models.load(scheduler_info)
+    with orig_scheduler_info as orig_scheduler:
+        scheduler_config = orig_scheduler.config
+
+    if "_backup" in scheduler_config:
+        scheduler_config = scheduler_config["_backup"]
+    scheduler_config = {
+        **scheduler_config,
+        **scheduler_extra_config,  # FIXME
+        "_backup": scheduler_config,
+    }
+
+    # make dpmpp_sde reproducable(seed can be passed only in initializer)
+    if scheduler_class is DPMSolverSDEScheduler:
+        scheduler_config["noise_sampler_seed"] = seed
+
+    scheduler = scheduler_class.from_config(scheduler_config)
+
+    # hack copied over from generate.py
+    if not hasattr(scheduler, "uses_inpainting_model"):
+        scheduler.uses_inpainting_model = lambda: False
+    assert isinstance(scheduler, Scheduler)
+    return scheduler
+
+
+@invocation(
+    "denoise_latents",
+    title="Denoise Latents",
+    tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
+    category="latents",
+    version="1.5.3",
+)
+class DenoiseLatentsInvocation(BaseInvocation):
+    """Denoises noisy latents to decodable images"""
+
+    positive_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
+        description=FieldDescriptions.positive_cond, input=Input.Connection, ui_order=0
+    )
+    negative_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
+        description=FieldDescriptions.negative_cond, input=Input.Connection, ui_order=1
+    )
+    noise: Optional[LatentsField] = InputField(
+        default=None,
+        description=FieldDescriptions.noise,
+        input=Input.Connection,
+        ui_order=3,
+    )
+    steps: int = InputField(default=10, gt=0, description=FieldDescriptions.steps)
+    cfg_scale: Union[float, List[float]] = InputField(
+        default=7.5, description=FieldDescriptions.cfg_scale, title="CFG Scale"
+    )
+    denoising_start: float = InputField(
+        default=0.0,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_start,
+    )
+    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
+    scheduler: SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description=FieldDescriptions.scheduler,
+        ui_type=UIType.Scheduler,
+    )
+    unet: UNetField = InputField(
+        description=FieldDescriptions.unet,
+        input=Input.Connection,
+        title="UNet",
+        ui_order=2,
+    )
+    control: Optional[Union[ControlField, list[ControlField]]] = InputField(
+        default=None,
+        input=Input.Connection,
+        ui_order=5,
+    )
+    ip_adapter: Optional[Union[IPAdapterField, list[IPAdapterField]]] = InputField(
+        description=FieldDescriptions.ip_adapter,
+        title="IP-Adapter",
+        default=None,
+        input=Input.Connection,
+        ui_order=6,
+    )
+    t2i_adapter: Optional[Union[T2IAdapterField, list[T2IAdapterField]]] = InputField(
+        description=FieldDescriptions.t2i_adapter,
+        title="T2I-Adapter",
+        default=None,
+        input=Input.Connection,
+        ui_order=7,
+    )
+    cfg_rescale_multiplier: float = InputField(
+        title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
+    )
+    latents: Optional[LatentsField] = InputField(
+        default=None,
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+        ui_order=4,
+    )
+    denoise_mask: Optional[DenoiseMaskField] = InputField(
+        default=None,
+        description=FieldDescriptions.mask,
+        input=Input.Connection,
+        ui_order=8,
+    )
+
+    @field_validator("cfg_scale")
+    def ge_one(cls, v: Union[List[float], float]) -> Union[List[float], float]:
+        """validate that all cfg_scale values are >= 1"""
+        if isinstance(v, list):
+            for i in v:
+                if i < 1:
+                    raise ValueError("cfg_scale must be greater than 1")
+        else:
+            if v < 1:
+                raise ValueError("cfg_scale must be greater than 1")
+        return v
+
+    @staticmethod
+    def _get_text_embeddings_and_masks(
+        cond_list: list[ConditioningField],
+        context: InvocationContext,
+        device: torch.device,
+        dtype: torch.dtype,
+    ) -> tuple[Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]], list[Optional[torch.Tensor]]]:
+        """Get the text embeddings and masks from the input conditioning fields."""
+        text_embeddings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]] = []
+        text_embeddings_masks: list[Optional[torch.Tensor]] = []
+        for cond in cond_list:
+            cond_data = context.conditioning.load(cond.conditioning_name)
+            text_embeddings.append(cond_data.conditionings[0].to(device=device, dtype=dtype))
+
+            mask = cond.mask
+            if mask is not None:
+                mask = context.tensors.load(mask.tensor_name)
+            text_embeddings_masks.append(mask)
+
+        return text_embeddings, text_embeddings_masks
+
+    @staticmethod
+    def _preprocess_regional_prompt_mask(
+        mask: Optional[torch.Tensor], target_height: int, target_width: int, dtype: torch.dtype
+    ) -> torch.Tensor:
+        """Preprocess a regional prompt mask to match the target height and width.
+        If mask is None, returns a mask of all ones with the target height and width.
+        If mask is not None, resizes the mask to the target height and width using 'nearest' interpolation.
+
+        Returns:
+            torch.Tensor: The processed mask. shape: (1, 1, target_height, target_width).
+        """
+
+        if mask is None:
+            return torch.ones((1, 1, target_height, target_width), dtype=dtype)
+
+        mask = to_standard_float_mask(mask, out_dtype=dtype)
+
+        tf = torchvision.transforms.Resize(
+            (target_height, target_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST
+        )
+
+        # Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
+        mask = mask.unsqueeze(0)  # Shape: (1, h, w) -> (1, 1, h, w)
+        resized_mask = tf(mask)
+        return resized_mask
+
+    @staticmethod
+    def _concat_regional_text_embeddings(
+        text_conditionings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]],
+        masks: Optional[list[Optional[torch.Tensor]]],
+        latent_height: int,
+        latent_width: int,
+        dtype: torch.dtype,
+    ) -> tuple[Union[BasicConditioningInfo, SDXLConditioningInfo], Optional[TextConditioningRegions]]:
+        """Concatenate regional text embeddings into a single embedding and track the region masks accordingly."""
+        if masks is None:
+            masks = [None] * len(text_conditionings)
+        assert len(text_conditionings) == len(masks)
+
+        is_sdxl = type(text_conditionings[0]) is SDXLConditioningInfo
+
+        all_masks_are_none = all(mask is None for mask in masks)
+
+        text_embedding = []
+        pooled_embedding = None
+        add_time_ids = None
+        cur_text_embedding_len = 0
+        processed_masks = []
+        embedding_ranges = []
+
+        for prompt_idx, text_embedding_info in enumerate(text_conditionings):
+            mask = masks[prompt_idx]
+
+            if is_sdxl:
+                # We choose a random SDXLConditioningInfo's pooled_embeds and add_time_ids here, with a preference for
+                # prompts without a mask. We prefer prompts without a mask, because they are more likely to contain
+                # global prompt information.  In an ideal case, there should be exactly one global prompt without a
+                # mask, but we don't enforce this.
+
+                # HACK(ryand): The fact that we have to choose a single pooled_embedding and add_time_ids here is a
+                # fundamental interface issue. The SDXL Compel nodes are not designed to be used in the way that we use
+                # them for regional prompting. Ideally, the DenoiseLatents invocation should accept a single
+                # pooled_embeds tensor and a list of standard text embeds with region masks. This change would be a
+                # pretty major breaking change to a popular node, so for now we use this hack.
+                if pooled_embedding is None or mask is None:
+                    pooled_embedding = text_embedding_info.pooled_embeds
+                if add_time_ids is None or mask is None:
+                    add_time_ids = text_embedding_info.add_time_ids
+
+            text_embedding.append(text_embedding_info.embeds)
+            if not all_masks_are_none:
+                embedding_ranges.append(
+                    Range(
+                        start=cur_text_embedding_len, end=cur_text_embedding_len + text_embedding_info.embeds.shape[1]
+                    )
+                )
+                processed_masks.append(
+                    DenoiseLatentsInvocation._preprocess_regional_prompt_mask(
+                        mask, latent_height, latent_width, dtype=dtype
+                    )
+                )
+
+            cur_text_embedding_len += text_embedding_info.embeds.shape[1]
+
+        text_embedding = torch.cat(text_embedding, dim=1)
+        assert len(text_embedding.shape) == 3  # batch_size, seq_len, token_len
+
+        regions = None
+        if not all_masks_are_none:
+            regions = TextConditioningRegions(
+                masks=torch.cat(processed_masks, dim=1),
+                ranges=embedding_ranges,
+            )
+
+        if is_sdxl:
+            return (
+                SDXLConditioningInfo(embeds=text_embedding, pooled_embeds=pooled_embedding, add_time_ids=add_time_ids),
+                regions,
+            )
+        return BasicConditioningInfo(embeds=text_embedding), regions
+
+    @staticmethod
+    def get_conditioning_data(
+        context: InvocationContext,
+        positive_conditioning_field: Union[ConditioningField, list[ConditioningField]],
+        negative_conditioning_field: Union[ConditioningField, list[ConditioningField]],
+        unet: UNet2DConditionModel,
+        latent_height: int,
+        latent_width: int,
+        cfg_scale: float | list[float],
+        steps: int,
+        cfg_rescale_multiplier: float,
+    ) -> TextConditioningData:
+        # Normalize positive_conditioning_field and negative_conditioning_field to lists.
+        cond_list = positive_conditioning_field
+        if not isinstance(cond_list, list):
+            cond_list = [cond_list]
+        uncond_list = negative_conditioning_field
+        if not isinstance(uncond_list, list):
+            uncond_list = [uncond_list]
+
+        cond_text_embeddings, cond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
+            cond_list, context, unet.device, unet.dtype
+        )
+        uncond_text_embeddings, uncond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
+            uncond_list, context, unet.device, unet.dtype
+        )
+
+        cond_text_embedding, cond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
+            text_conditionings=cond_text_embeddings,
+            masks=cond_text_embedding_masks,
+            latent_height=latent_height,
+            latent_width=latent_width,
+            dtype=unet.dtype,
+        )
+        uncond_text_embedding, uncond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
+            text_conditionings=uncond_text_embeddings,
+            masks=uncond_text_embedding_masks,
+            latent_height=latent_height,
+            latent_width=latent_width,
+            dtype=unet.dtype,
+        )
+
+        if isinstance(cfg_scale, list):
+            assert len(cfg_scale) == steps, "cfg_scale (list) must have the same length as the number of steps"
+
+        conditioning_data = TextConditioningData(
+            uncond_text=uncond_text_embedding,
+            cond_text=cond_text_embedding,
+            uncond_regions=uncond_regions,
+            cond_regions=cond_regions,
+            guidance_scale=cfg_scale,
+            guidance_rescale_multiplier=cfg_rescale_multiplier,
+        )
+        return conditioning_data
+
+    @staticmethod
+    def create_pipeline(
+        unet: UNet2DConditionModel,
+        scheduler: Scheduler,
+    ) -> StableDiffusionGeneratorPipeline:
+        class FakeVae:
+            class FakeVaeConfig:
+                def __init__(self) -> None:
+                    self.block_out_channels = [0]
+
+            def __init__(self) -> None:
+                self.config = FakeVae.FakeVaeConfig()
+
+        return StableDiffusionGeneratorPipeline(
+            vae=FakeVae(),  # TODO: oh...
+            text_encoder=None,
+            tokenizer=None,
+            unet=unet,
+            scheduler=scheduler,
+            safety_checker=None,
+            feature_extractor=None,
+            requires_safety_checker=False,
+        )
+
+    @staticmethod
+    def prep_control_data(
+        context: InvocationContext,
+        control_input: ControlField | list[ControlField] | None,
+        latents_shape: List[int],
+        exit_stack: ExitStack,
+        do_classifier_free_guidance: bool = True,
+    ) -> list[ControlNetData] | None:
+        # Normalize control_input to a list.
+        control_list: list[ControlField]
+        if isinstance(control_input, ControlField):
+            control_list = [control_input]
+        elif isinstance(control_input, list):
+            control_list = control_input
+        elif control_input is None:
+            control_list = []
+        else:
+            raise ValueError(f"Unexpected control_input type: {type(control_input)}")
+
+        if len(control_list) == 0:
+            return None
+
+        # Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR.
+        _, _, latent_height, latent_width = latents_shape
+        control_height_resize = latent_height * LATENT_SCALE_FACTOR
+        control_width_resize = latent_width * LATENT_SCALE_FACTOR
+
+        controlnet_data: list[ControlNetData] = []
+        for control_info in control_list:
+            control_model = exit_stack.enter_context(context.models.load(control_info.control_model))
+            assert isinstance(control_model, ControlNetModel)
+
+            control_image_field = control_info.image
+            input_image = context.images.get_pil(control_image_field.image_name)
+            # self.image.image_type, self.image.image_name
+            # FIXME: still need to test with different widths, heights, devices, dtypes
+            #        and add in batch_size, num_images_per_prompt?
+            #        and do real check for classifier_free_guidance?
+            # prepare_control_image should return torch.Tensor of shape(batch_size, 3, height, width)
+            control_image = prepare_control_image(
+                image=input_image,
+                do_classifier_free_guidance=do_classifier_free_guidance,
+                width=control_width_resize,
+                height=control_height_resize,
+                # batch_size=batch_size * num_images_per_prompt,
+                # num_images_per_prompt=num_images_per_prompt,
+                device=control_model.device,
+                dtype=control_model.dtype,
+                control_mode=control_info.control_mode,
+                resize_mode=control_info.resize_mode,
+            )
+            control_item = ControlNetData(
+                model=control_model,
+                image_tensor=control_image,
+                weight=control_info.control_weight,
+                begin_step_percent=control_info.begin_step_percent,
+                end_step_percent=control_info.end_step_percent,
+                control_mode=control_info.control_mode,
+                # any resizing needed should currently be happening in prepare_control_image(),
+                #    but adding resize_mode to ControlNetData in case needed in the future
+                resize_mode=control_info.resize_mode,
+            )
+            controlnet_data.append(control_item)
+            # MultiControlNetModel has been refactored out, just need list[ControlNetData]
+
+        return controlnet_data
+
+    def prep_ip_adapter_image_prompts(
+        self,
+        context: InvocationContext,
+        ip_adapters: List[IPAdapterField],
+    ) -> List[Tuple[torch.Tensor, torch.Tensor]]:
+        """Run the IPAdapter CLIPVisionModel, returning image prompt embeddings."""
+        image_prompts = []
+        for single_ip_adapter in ip_adapters:
+            with context.models.load(single_ip_adapter.ip_adapter_model) as ip_adapter_model:
+                assert isinstance(ip_adapter_model, IPAdapter)
+                image_encoder_model_info = context.models.load(single_ip_adapter.image_encoder_model)
+                # `single_ip_adapter.image` could be a list or a single ImageField. Normalize to a list here.
+                single_ipa_image_fields = single_ip_adapter.image
+                if not isinstance(single_ipa_image_fields, list):
+                    single_ipa_image_fields = [single_ipa_image_fields]
+
+                single_ipa_images = [context.images.get_pil(image.image_name) for image in single_ipa_image_fields]
+                with image_encoder_model_info as image_encoder_model:
+                    assert isinstance(image_encoder_model, CLIPVisionModelWithProjection)
+                    # Get image embeddings from CLIP and ImageProjModel.
+                    image_prompt_embeds, uncond_image_prompt_embeds = ip_adapter_model.get_image_embeds(
+                        single_ipa_images, image_encoder_model
+                    )
+                    image_prompts.append((image_prompt_embeds, uncond_image_prompt_embeds))
+
+        return image_prompts
+
+    def prep_ip_adapter_data(
+        self,
+        context: InvocationContext,
+        ip_adapters: List[IPAdapterField],
+        image_prompts: List[Tuple[torch.Tensor, torch.Tensor]],
+        exit_stack: ExitStack,
+        latent_height: int,
+        latent_width: int,
+        dtype: torch.dtype,
+    ) -> Optional[List[IPAdapterData]]:
+        """If IP-Adapter is enabled, then this function loads the requisite models and adds the image prompt conditioning data."""
+        ip_adapter_data_list = []
+        for single_ip_adapter, (image_prompt_embeds, uncond_image_prompt_embeds) in zip(
+            ip_adapters, image_prompts, strict=True
+        ):
+            ip_adapter_model = exit_stack.enter_context(context.models.load(single_ip_adapter.ip_adapter_model))
+
+            mask_field = single_ip_adapter.mask
+            mask = context.tensors.load(mask_field.tensor_name) if mask_field is not None else None
+            mask = self._preprocess_regional_prompt_mask(mask, latent_height, latent_width, dtype=dtype)
+
+            ip_adapter_data_list.append(
+                IPAdapterData(
+                    ip_adapter_model=ip_adapter_model,
+                    weight=single_ip_adapter.weight,
+                    target_blocks=single_ip_adapter.target_blocks,
+                    begin_step_percent=single_ip_adapter.begin_step_percent,
+                    end_step_percent=single_ip_adapter.end_step_percent,
+                    ip_adapter_conditioning=IPAdapterConditioningInfo(image_prompt_embeds, uncond_image_prompt_embeds),
+                    mask=mask,
+                )
+            )
+
+        return ip_adapter_data_list if len(ip_adapter_data_list) > 0 else None
+
+    def run_t2i_adapters(
+        self,
+        context: InvocationContext,
+        t2i_adapter: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
+        latents_shape: list[int],
+        do_classifier_free_guidance: bool,
+    ) -> Optional[list[T2IAdapterData]]:
+        if t2i_adapter is None:
+            return None
+
+        # Handle the possibility that t2i_adapter could be a list or a single T2IAdapterField.
+        if isinstance(t2i_adapter, T2IAdapterField):
+            t2i_adapter = [t2i_adapter]
+
+        if len(t2i_adapter) == 0:
+            return None
+
+        t2i_adapter_data = []
+        for t2i_adapter_field in t2i_adapter:
+            t2i_adapter_model_config = context.models.get_config(t2i_adapter_field.t2i_adapter_model.key)
+            t2i_adapter_loaded_model = context.models.load(t2i_adapter_field.t2i_adapter_model)
+            image = context.images.get_pil(t2i_adapter_field.image.image_name)
+
+            # The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
+            if t2i_adapter_model_config.base == BaseModelType.StableDiffusion1:
+                max_unet_downscale = 8
+            elif t2i_adapter_model_config.base == BaseModelType.StableDiffusionXL:
+                max_unet_downscale = 4
+            else:
+                raise ValueError(f"Unexpected T2I-Adapter base model type: '{t2i_adapter_model_config.base}'.")
+
+            t2i_adapter_model: T2IAdapter
+            with t2i_adapter_loaded_model as t2i_adapter_model:
+                total_downscale_factor = t2i_adapter_model.total_downscale_factor
+
+                # Resize the T2I-Adapter input image.
+                # We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
+                # result will match the latent image's dimensions after max_unet_downscale is applied.
+                t2i_input_height = latents_shape[2] // max_unet_downscale * total_downscale_factor
+                t2i_input_width = latents_shape[3] // max_unet_downscale * total_downscale_factor
+
+                # Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
+                # a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
+                # T2I-Adapter model.
+                #
+                # Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
+                # of the same requirements (e.g. preserving binary masks during resize).
+                t2i_image = prepare_control_image(
+                    image=image,
+                    do_classifier_free_guidance=False,
+                    width=t2i_input_width,
+                    height=t2i_input_height,
+                    num_channels=t2i_adapter_model.config["in_channels"],  # mypy treats this as a FrozenDict
+                    device=t2i_adapter_model.device,
+                    dtype=t2i_adapter_model.dtype,
+                    resize_mode=t2i_adapter_field.resize_mode,
+                )
+
+                adapter_state = t2i_adapter_model(t2i_image)
+
+            if do_classifier_free_guidance:
+                for idx, value in enumerate(adapter_state):
+                    adapter_state[idx] = torch.cat([value] * 2, dim=0)
+
+            t2i_adapter_data.append(
+                T2IAdapterData(
+                    adapter_state=adapter_state,
+                    weight=t2i_adapter_field.weight,
+                    begin_step_percent=t2i_adapter_field.begin_step_percent,
+                    end_step_percent=t2i_adapter_field.end_step_percent,
+                )
+            )
+
+        return t2i_adapter_data
+
+    # original idea by https://github.com/AmericanPresidentJimmyCarter
+    # TODO: research more for second order schedulers timesteps
+    @staticmethod
+    def init_scheduler(
+        scheduler: Union[Scheduler, ConfigMixin],
+        device: torch.device,
+        steps: int,
+        denoising_start: float,
+        denoising_end: float,
+        seed: int,
+    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]]:
+        assert isinstance(scheduler, ConfigMixin)
+        if scheduler.config.get("cpu_only", False):
+            scheduler.set_timesteps(steps, device="cpu")
+            timesteps = scheduler.timesteps.to(device=device)
+        else:
+            scheduler.set_timesteps(steps, device=device)
+            timesteps = scheduler.timesteps
+
+        # skip greater order timesteps
+        _timesteps = timesteps[:: scheduler.order]
+
+        # get start timestep index
+        t_start_val = int(round(scheduler.config["num_train_timesteps"] * (1 - denoising_start)))
+        t_start_idx = len(list(filter(lambda ts: ts >= t_start_val, _timesteps)))
+
+        # get end timestep index
+        t_end_val = int(round(scheduler.config["num_train_timesteps"] * (1 - denoising_end)))
+        t_end_idx = len(list(filter(lambda ts: ts >= t_end_val, _timesteps[t_start_idx:])))
+
+        # apply order to indexes
+        t_start_idx *= scheduler.order
+        t_end_idx *= scheduler.order
+
+        init_timestep = timesteps[t_start_idx : t_start_idx + 1]
+        timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx]
+
+        scheduler_step_kwargs: Dict[str, Any] = {}
+        scheduler_step_signature = inspect.signature(scheduler.step)
+        if "generator" in scheduler_step_signature.parameters:
+            # At some point, someone decided that schedulers that accept a generator should use the original seed with
+            # all bits flipped. I don't know the original rationale for this, but now we must keep it like this for
+            # reproducibility.
+            #
+            # These Invoke-supported schedulers accept a generator as of 2024-06-04:
+            #   - DDIMScheduler
+            #   - DDPMScheduler
+            #   - DPMSolverMultistepScheduler
+            #   - EulerAncestralDiscreteScheduler
+            #   - EulerDiscreteScheduler
+            #   - KDPM2AncestralDiscreteScheduler
+            #   - LCMScheduler
+            #   - TCDScheduler
+            scheduler_step_kwargs.update({"generator": torch.Generator(device=device).manual_seed(seed ^ 0xFFFFFFFF)})
+        if isinstance(scheduler, TCDScheduler):
+            scheduler_step_kwargs.update({"eta": 1.0})
+
+        return timesteps, init_timestep, scheduler_step_kwargs
+
+    def prep_inpaint_mask(
+        self, context: InvocationContext, latents: torch.Tensor
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], bool]:
+        if self.denoise_mask is None:
+            return None, None, False
+
+        mask = context.tensors.load(self.denoise_mask.mask_name)
+        mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
+        if self.denoise_mask.masked_latents_name is not None:
+            masked_latents = context.tensors.load(self.denoise_mask.masked_latents_name)
+        else:
+            masked_latents = torch.where(mask < 0.5, 0.0, latents)
+
+        return 1 - mask, masked_latents, self.denoise_mask.gradient
+
+    @staticmethod
+    def prepare_noise_and_latents(
+        context: InvocationContext, noise_field: LatentsField | None, latents_field: LatentsField | None
+    ) -> Tuple[int, torch.Tensor | None, torch.Tensor]:
+        """Depending on the workflow, we expect different combinations of noise and latents to be provided. This
+        function handles preparing these values accordingly.
+
+        Expected workflows:
+        - Text-to-Image Denoising: `noise` is provided, `latents` is not. `latents` is initialized to zeros.
+        - Image-to-Image Denoising: `noise` and `latents` are both provided.
+        - Text-to-Image SDXL Refiner Denoising: `latents` is provided, `noise` is not.
+        - Image-to-Image SDXL Refiner Denoising: `latents` is provided, `noise` is not.
+
+        NOTE(ryand): I wrote this docstring, but I am not the original author of this code. There may be other workflows
+        I haven't considered.
+        """
+        noise = None
+        if noise_field is not None:
+            noise = context.tensors.load(noise_field.latents_name)
+
+        if latents_field is not None:
+            latents = context.tensors.load(latents_field.latents_name)
+        elif noise is not None:
+            latents = torch.zeros_like(noise)
+        else:
+            raise ValueError("'latents' or 'noise' must be provided!")
+
+        if noise is not None and noise.shape[1:] != latents.shape[1:]:
+            raise ValueError(f"Incompatable 'noise' and 'latents' shapes: {latents.shape=} {noise.shape=}")
+
+        # The seed comes from (in order of priority): the noise field, the latents field, or 0.
+        seed = 0
+        if noise_field is not None and noise_field.seed is not None:
+            seed = noise_field.seed
+        elif latents_field is not None and latents_field.seed is not None:
+            seed = latents_field.seed
+        else:
+            seed = 0
+
+        return seed, noise, latents
+
+    @torch.no_grad()
+    @SilenceWarnings()  # This quenches the NSFW nag from diffusers.
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
+
+        mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+
+        # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
+        # below. Investigate whether this is appropriate.
+        t2i_adapter_data = self.run_t2i_adapters(
+            context,
+            self.t2i_adapter,
+            latents.shape,
+            do_classifier_free_guidance=True,
+        )
+
+        ip_adapters: List[IPAdapterField] = []
+        if self.ip_adapter is not None:
+            # ip_adapter could be a list or a single IPAdapterField. Normalize to a list here.
+            if isinstance(self.ip_adapter, list):
+                ip_adapters = self.ip_adapter
+            else:
+                ip_adapters = [self.ip_adapter]
+
+        # If there are IP adapters, the following line runs the adapters' CLIPVision image encoders to return
+        # a series of image conditioning embeddings. This is being done here rather than in the
+        # big model context below in order to use less VRAM on low-VRAM systems.
+        # The image prompts are then passed to prep_ip_adapter_data().
+        image_prompts = self.prep_ip_adapter_image_prompts(context=context, ip_adapters=ip_adapters)
+
+        # get the unet's config so that we can pass the base to dispatch_progress()
+        unet_config = context.models.get_config(self.unet.unet.key)
+
+        def step_callback(state: PipelineIntermediateState) -> None:
+            context.util.sd_step_callback(state, unet_config.base)
+
+        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            for lora in self.unet.loras:
+                lora_info = context.models.load(lora.lora)
+                assert isinstance(lora_info.model, LoRAModelRaw)
+                yield (lora_info.model, lora.weight)
+                del lora_info
+            return
+
+        unet_info = context.models.load(self.unet.unet)
+        assert isinstance(unet_info.model, UNet2DConditionModel)
+        with (
+            ExitStack() as exit_stack,
+            unet_info.model_on_device() as (model_state_dict, unet),
+            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
+            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            # Apply the LoRA after unet has been moved to its target device for faster patching.
+            ModelPatcher.apply_lora_unet(
+                unet,
+                loras=_lora_loader(),
+                model_state_dict=model_state_dict,
+            ),
+        ):
+            assert isinstance(unet, UNet2DConditionModel)
+            latents = latents.to(device=unet.device, dtype=unet.dtype)
+            if noise is not None:
+                noise = noise.to(device=unet.device, dtype=unet.dtype)
+            if mask is not None:
+                mask = mask.to(device=unet.device, dtype=unet.dtype)
+            if masked_latents is not None:
+                masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
+
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+                seed=seed,
+            )
+
+            pipeline = self.create_pipeline(unet, scheduler)
+
+            _, _, latent_height, latent_width = latents.shape
+            conditioning_data = self.get_conditioning_data(
+                context=context,
+                positive_conditioning_field=self.positive_conditioning,
+                negative_conditioning_field=self.negative_conditioning,
+                unet=unet,
+                latent_height=latent_height,
+                latent_width=latent_width,
+                cfg_scale=self.cfg_scale,
+                steps=self.steps,
+                cfg_rescale_multiplier=self.cfg_rescale_multiplier,
+            )
+
+            controlnet_data = self.prep_control_data(
+                context=context,
+                control_input=self.control,
+                latents_shape=latents.shape,
+                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                do_classifier_free_guidance=True,
+                exit_stack=exit_stack,
+            )
+
+            ip_adapter_data = self.prep_ip_adapter_data(
+                context=context,
+                ip_adapters=ip_adapters,
+                image_prompts=image_prompts,
+                exit_stack=exit_stack,
+                latent_height=latent_height,
+                latent_width=latent_width,
+                dtype=unet.dtype,
+            )
+
+            timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
+                scheduler,
+                device=unet.device,
+                steps=self.steps,
+                denoising_start=self.denoising_start,
+                denoising_end=self.denoising_end,
+                seed=seed,
+            )
+
+            result_latents = pipeline.latents_from_embeddings(
+                latents=latents,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                noise=noise,
+                seed=seed,
+                mask=mask,
+                masked_latents=masked_latents,
+                is_gradient_mask=gradient_mask,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                conditioning_data=conditioning_data,
+                control_data=controlnet_data,
+                ip_adapter_data=ip_adapter_data,
+                t2i_adapter_data=t2i_adapter_data,
+                callback=step_callback,
+            )
+
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        result_latents = result_latents.to("cpu")
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=result_latents)
+        return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
--- a/invokeai/app/invocations/ideal_size.py
+++ b/invokeai/app/invocations/ideal_size.py
@@ -0,0 +1,65 @@
+import math
+from typing import Tuple
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
+from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
+from invokeai.app.invocations.model import UNetField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.config import BaseModelType
+
+
+@invocation_output("ideal_size_output")
+class IdealSizeOutput(BaseInvocationOutput):
+    """Base class for invocations that output an image"""
+
+    width: int = OutputField(description="The ideal width of the image (in pixels)")
+    height: int = OutputField(description="The ideal height of the image (in pixels)")
+
+
+@invocation(
+    "ideal_size",
+    title="Ideal Size",
+    tags=["latents", "math", "ideal_size"],
+    version="1.0.3",
+)
+class IdealSizeInvocation(BaseInvocation):
+    """Calculates the ideal size for generation to avoid duplication"""
+
+    width: int = InputField(default=1024, description="Final image width")
+    height: int = InputField(default=576, description="Final image height")
+    unet: UNetField = InputField(default=None, description=FieldDescriptions.unet)
+    multiplier: float = InputField(
+        default=1.0,
+        description="Amount to multiply the model's dimensions by when calculating the ideal size (may result in "
+        "initial generation artifacts if too large)",
+    )
+
+    def trim_to_multiple_of(self, *args: int, multiple_of: int = LATENT_SCALE_FACTOR) -> Tuple[int, ...]:
+        return tuple((x - x % multiple_of) for x in args)
+
+    def invoke(self, context: InvocationContext) -> IdealSizeOutput:
+        unet_config = context.models.get_config(self.unet.unet.key)
+        aspect = self.width / self.height
+        dimension: float = 512
+        if unet_config.base == BaseModelType.StableDiffusion2:
+            dimension = 768
+        elif unet_config.base == BaseModelType.StableDiffusionXL:
+            dimension = 1024
+        dimension = dimension * self.multiplier
+        min_dimension = math.floor(dimension * 0.5)
+        model_area = dimension * dimension  # hardcoded for now since all models are trained on square images
+
+        if aspect > 1.0:
+            init_height = max(min_dimension, math.sqrt(model_area / aspect))
+            init_width = init_height * aspect
+        else:
+            init_width = max(min_dimension, math.sqrt(model_area * aspect))
+            init_height = init_width / aspect
+
+        scaled_width, scaled_height = self.trim_to_multiple_of(
+            math.floor(init_width),
+            math.floor(init_height),
+        )
+
+        return IdealSizeOutput(width=scaled_width, height=scaled_height)
--- a/invokeai/app/invocations/image_to_latents.py
+++ b/invokeai/app/invocations/image_to_latents.py
@@ -0,0 +1,125 @@
+from functools import singledispatchmethod
+
+import einops
+import torch
+from diffusers.models.attention_processor import (
+    AttnProcessor2_0,
+    LoRAAttnProcessor2_0,
+    LoRAXFormersAttnProcessor,
+    XFormersAttnProcessor,
+)
+from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
+from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import DEFAULT_PRECISION
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    ImageField,
+    Input,
+    InputField,
+)
+from invokeai.app.invocations.model import VAEField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager import LoadedModel
+from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
+
+
+@invocation(
+    "i2l",
+    title="Image to Latents",
+    tags=["latents", "image", "vae", "i2l"],
+    category="latents",
+    version="1.0.2",
+)
+class ImageToLatentsInvocation(BaseInvocation):
+    """Encodes an image into latents."""
+
+    image: ImageField = InputField(
+        description="The image to encode",
+    )
+    vae: VAEField = InputField(
+        description=FieldDescriptions.vae,
+        input=Input.Connection,
+    )
+    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
+    fp32: bool = InputField(default=DEFAULT_PRECISION == torch.float32, description=FieldDescriptions.fp32)
+
+    @staticmethod
+    def vae_encode(vae_info: LoadedModel, upcast: bool, tiled: bool, image_tensor: torch.Tensor) -> torch.Tensor:
+        with vae_info as vae:
+            assert isinstance(vae, torch.nn.Module)
+            orig_dtype = vae.dtype
+            if upcast:
+                vae.to(dtype=torch.float32)
+
+                use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
+                    vae.decoder.mid_block.attentions[0].processor,
+                    (
+                        AttnProcessor2_0,
+                        XFormersAttnProcessor,
+                        LoRAXFormersAttnProcessor,
+                        LoRAAttnProcessor2_0,
+                    ),
+                )
+                # if xformers or torch_2_0 is used attention block does not need
+                # to be in float32 which can save lots of memory
+                if use_torch_2_0_or_xformers:
+                    vae.post_quant_conv.to(orig_dtype)
+                    vae.decoder.conv_in.to(orig_dtype)
+                    vae.decoder.mid_block.to(orig_dtype)
+                # else:
+                #    latents = latents.float()
+
+            else:
+                vae.to(dtype=torch.float16)
+                # latents = latents.half()
+
+            if tiled:
+                vae.enable_tiling()
+            else:
+                vae.disable_tiling()
+
+            # non_noised_latents_from_image
+            image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype)
+            with torch.inference_mode():
+                latents = ImageToLatentsInvocation._encode_to_tensor(vae, image_tensor)
+
+            latents = vae.config.scaling_factor * latents
+            latents = latents.to(dtype=orig_dtype)
+
+        return latents
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        image = context.images.get_pil(self.image.image_name)
+
+        vae_info = context.models.load(self.vae.vae)
+
+        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
+        if image_tensor.dim() == 3:
+            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
+
+        latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor)
+
+        latents = latents.to("cpu")
+        name = context.tensors.save(tensor=latents)
+        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
+
+    @singledispatchmethod
+    @staticmethod
+    def _encode_to_tensor(vae: AutoencoderKL, image_tensor: torch.FloatTensor) -> torch.FloatTensor:
+        assert isinstance(vae, torch.nn.Module)
+        image_tensor_dist = vae.encode(image_tensor).latent_dist
+        latents: torch.Tensor = image_tensor_dist.sample().to(
+            dtype=vae.dtype
+        )  # FIXME: uses torch.randn. make reproducible!
+        return latents
+
+    @_encode_to_tensor.register
+    @staticmethod
+    def _(vae: AutoencoderTiny, image_tensor: torch.FloatTensor) -> torch.FloatTensor:
+        assert isinstance(vae, torch.nn.Module)
+        latents: torch.FloatTensor = vae.encode(image_tensor).latents
+        return latents
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@@ -0,0 +1,127 @@
+import torch
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.models.attention_processor import (
+    AttnProcessor2_0,
+    LoRAAttnProcessor2_0,
+    LoRAXFormersAttnProcessor,
+    XFormersAttnProcessor,
+)
+from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
+from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
+from PIL import Image
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import DEFAULT_PRECISION
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    Input,
+    InputField,
+    LatentsField,
+    WithBoard,
+    WithMetadata,
+)
+from invokeai.app.invocations.model import VAEField
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.load.load_base import LoadedModel
+from invokeai.backend.stable_diffusion import set_seamless
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation(
+    "l2i",
+    title="Latents to Image",
+    tags=["latents", "image", "vae", "l2i"],
+    category="latents",
+    version="1.2.2",
+)
+class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an image from latents."""
+
+    latents: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    vae: VAEField = InputField(
+        description=FieldDescriptions.vae,
+        input=Input.Connection,
+    )
+    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
+    fp32: bool = InputField(default=DEFAULT_PRECISION == torch.float32, description=FieldDescriptions.fp32)
+
+    @staticmethod
+    def vae_decode(
+        context: InvocationContext,
+        vae_info: LoadedModel,
+        seamless_axes: list[str],
+        latents: torch.Tensor,
+        use_fp32: bool,
+        use_tiling: bool,
+    ) -> Image.Image:
+        assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
+        with set_seamless(vae_info.model, seamless_axes), vae_info as vae:
+            assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
+            latents = latents.to(vae.device)
+            if use_fp32:
+                vae.to(dtype=torch.float32)
+
+                use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance(
+                    vae.decoder.mid_block.attentions[0].processor,
+                    (
+                        AttnProcessor2_0,
+                        XFormersAttnProcessor,
+                        LoRAXFormersAttnProcessor,
+                        LoRAAttnProcessor2_0,
+                    ),
+                )
+                # if xformers or torch_2_0 is used attention block does not need
+                # to be in float32 which can save lots of memory
+                if use_torch_2_0_or_xformers:
+                    vae.post_quant_conv.to(latents.dtype)
+                    vae.decoder.conv_in.to(latents.dtype)
+                    vae.decoder.mid_block.to(latents.dtype)
+                else:
+                    latents = latents.float()
+
+            else:
+                vae.to(dtype=torch.float16)
+                latents = latents.half()
+
+            if use_tiling or context.config.get().force_tiled_decode:
+                vae.enable_tiling()
+            else:
+                vae.disable_tiling()
+
+            # clear memory as vae decode can request a lot
+            TorchDevice.empty_cache()
+
+            with torch.inference_mode():
+                # copied from diffusers pipeline
+                latents = latents / vae.config.scaling_factor
+                image = vae.decode(latents, return_dict=False)[0]
+                image = (image / 2 + 0.5).clamp(0, 1)  # denormalize
+                # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+                np_image = image.cpu().permute(0, 2, 3, 1).float().numpy()
+
+                image = VaeImageProcessor.numpy_to_pil(np_image)[0]
+
+        TorchDevice.empty_cache()
+
+        return image
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        latents = context.tensors.load(self.latents.latents_name)
+        vae_info = context.models.load(self.vae.vae)
+
+        image = self.vae_decode(
+            context=context,
+            vae_info=vae_info,
+            seamless_axes=self.vae.seamless_axes,
+            latents=latents,
+            use_fp32=self.fp32,
+            use_tiling=self.tiled,
+        )
+        image_dto = context.images.save(image=image)
+
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/resize_latents.py
+++ b/invokeai/app/invocations/resize_latents.py
@@ -0,0 +1,103 @@
+from typing import Literal
+
+import torch
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    Input,
+    InputField,
+    LatentsField,
+)
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.util.devices import TorchDevice
+
+LATENTS_INTERPOLATION_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"]
+
+
+@invocation(
+    "lresize",
+    title="Resize Latents",
+    tags=["latents", "resize"],
+    category="latents",
+    version="1.0.2",
+)
+class ResizeLatentsInvocation(BaseInvocation):
+    """Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8."""
+
+    latents: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    width: int = InputField(
+        ge=64,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description=FieldDescriptions.width,
+    )
+    height: int = InputField(
+        ge=64,
+        multiple_of=LATENT_SCALE_FACTOR,
+        description=FieldDescriptions.width,
+    )
+    mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode)
+    antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias)
+
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents = context.tensors.load(self.latents.latents_name)
+        device = TorchDevice.choose_torch_device()
+
+        resized_latents = torch.nn.functional.interpolate(
+            latents.to(device),
+            size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR),
+            mode=self.mode,
+            antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False,
+        )
+
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        resized_latents = resized_latents.to("cpu")
+
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=resized_latents)
+        return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
+
+
+@invocation(
+    "lscale",
+    title="Scale Latents",
+    tags=["latents", "resize"],
+    category="latents",
+    version="1.0.2",
+)
+class ScaleLatentsInvocation(BaseInvocation):
+    """Scales latents by a given factor."""
+
+    latents: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    scale_factor: float = InputField(gt=0, description=FieldDescriptions.scale_factor)
+    mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode)
+    antialias: bool = InputField(default=False, description=FieldDescriptions.torch_antialias)
+
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents = context.tensors.load(self.latents.latents_name)
+
+        device = TorchDevice.choose_torch_device()
+
+        # resizing
+        resized_latents = torch.nn.functional.interpolate(
+            latents.to(device),
+            scale_factor=self.scale_factor,
+            mode=self.mode,
+            antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False,
+        )
+
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        resized_latents = resized_latents.to("cpu")
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=resized_latents)
+        return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
--- a/invokeai/app/invocations/scheduler.py
+++ b/invokeai/app/invocations/scheduler.py
@@ -0,0 +1,34 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    InputField,
+    OutputField,
+    UIType,
+)
+from invokeai.app.services.shared.invocation_context import InvocationContext
+
+
+@invocation_output("scheduler_output")
+class SchedulerOutput(BaseInvocationOutput):
+    scheduler: SCHEDULER_NAME_VALUES = OutputField(description=FieldDescriptions.scheduler, ui_type=UIType.Scheduler)
+
+
+@invocation(
+    "scheduler",
+    title="Scheduler",
+    tags=["scheduler"],
+    category="latents",
+    version="1.0.0",
+)
+class SchedulerInvocation(BaseInvocation):
+    """Selects a scheduler."""
+
+    scheduler: SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description=FieldDescriptions.scheduler,
+        ui_type=UIType.Scheduler,
+    )
+
+    def invoke(self, context: InvocationContext) -> SchedulerOutput:
+        return SchedulerOutput(scheduler=self.scheduler)
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -0,0 +1,268 @@
+import copy
+from contextlib import ExitStack
+from typing import Iterator, Tuple
+
+import torch
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+from pydantic import field_validator
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.controlnet_image_processors import ControlField
+from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation, get_scheduler
+from invokeai.app.invocations.fields import (
+    ConditioningField,
+    FieldDescriptions,
+    Input,
+    InputField,
+    LatentsField,
+    UIType,
+)
+from invokeai.app.invocations.model import UNetField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.stable_diffusion.diffusers_pipeline import ControlNetData
+from invokeai.backend.stable_diffusion.multi_diffusion_pipeline import (
+    MultiDiffusionPipeline,
+    MultiDiffusionRegionConditioning,
+)
+from invokeai.backend.tiles.tiles import (
+    calc_tiles_min_overlap,
+)
+from invokeai.backend.tiles.utils import TBLR
+from invokeai.backend.util.devices import TorchDevice
+
+
+def crop_controlnet_data(control_data: ControlNetData, latent_region: TBLR) -> ControlNetData:
+    """Crop a ControlNetData object to a region."""
+    # Create a shallow copy of the control_data object.
+    control_data_copy = copy.copy(control_data)
+    # The ControlNet reference image is the only attribute that needs to be cropped.
+    control_data_copy.image_tensor = control_data.image_tensor[
+        :,
+        :,
+        latent_region.top * LATENT_SCALE_FACTOR : latent_region.bottom * LATENT_SCALE_FACTOR,
+        latent_region.left * LATENT_SCALE_FACTOR : latent_region.right * LATENT_SCALE_FACTOR,
+    ]
+    return control_data_copy
+
+
+@invocation(
+    "tiled_multi_diffusion_denoise_latents",
+    title="Tiled Multi-Diffusion Denoise Latents",
+    tags=["upscale", "denoise"],
+    category="latents",
+    # TODO(ryand): Reset to 1.0.0 right before release.
+    version="1.0.0",
+)
+class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
+    """Tiled Multi-Diffusion denoising.
+
+    This node handles automatically tiling the input image. Future iterations of
+    this node should allow the user to specify custom regions with different parameters for each region to harness the
+    full power of Multi-Diffusion.
+
+    This node has a similar interface to the `DenoiseLatents` node, but it has a reduced feature set (no IP-Adapter,
+    T2I-Adapter, masking, etc.).
+    """
+
+    positive_conditioning: ConditioningField = InputField(
+        description=FieldDescriptions.positive_cond, input=Input.Connection
+    )
+    negative_conditioning: ConditioningField = InputField(
+        description=FieldDescriptions.negative_cond, input=Input.Connection
+    )
+    noise: LatentsField | None = InputField(
+        default=None,
+        description=FieldDescriptions.noise,
+        input=Input.Connection,
+    )
+    latents: LatentsField | None = InputField(
+        default=None,
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    # TODO(ryand): Add multiple-of validation.
+    # TODO(ryand): Smaller defaults might make more sense.
+    tile_height: int = InputField(default=112, gt=0, description="Height of the tiles in latent space.")
+    tile_width: int = InputField(default=112, gt=0, description="Width of the tiles in latent space.")
+    tile_min_overlap: int = InputField(
+        default=16,
+        gt=0,
+        description="The minimum overlap between adjacent tiles in latent space. The actual overlap may be larger than "
+        "this to evenly cover the entire image.",
+    )
+    steps: int = InputField(default=18, gt=0, description=FieldDescriptions.steps)
+    cfg_scale: float | list[float] = InputField(default=6.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
+    # TODO(ryand): The default here should probably be 0.0.
+    denoising_start: float = InputField(
+        default=0.65,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_start,
+    )
+    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
+    scheduler: SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description=FieldDescriptions.scheduler,
+        ui_type=UIType.Scheduler,
+    )
+    unet: UNetField = InputField(
+        description=FieldDescriptions.unet,
+        input=Input.Connection,
+        title="UNet",
+    )
+    cfg_rescale_multiplier: float = InputField(
+        title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
+    )
+    control: ControlField | list[ControlField] | None = InputField(
+        default=None,
+        input=Input.Connection,
+    )
+
+    @field_validator("cfg_scale")
+    def ge_one(cls, v: list[float] | float) -> list[float] | float:
+        """Validate that all cfg_scale values are >= 1"""
+        if isinstance(v, list):
+            for i in v:
+                if i < 1:
+                    raise ValueError("cfg_scale must be greater than 1")
+        else:
+            if v < 1:
+                raise ValueError("cfg_scale must be greater than 1")
+        return v
+
+    @staticmethod
+    def create_pipeline(
+        unet: UNet2DConditionModel,
+        scheduler: SchedulerMixin,
+    ) -> MultiDiffusionPipeline:
+        # TODO(ryand): Get rid of this FakeVae hack.
+        class FakeVae:
+            class FakeVaeConfig:
+                def __init__(self) -> None:
+                    self.block_out_channels = [0]
+
+            def __init__(self) -> None:
+                self.config = FakeVae.FakeVaeConfig()
+
+        return MultiDiffusionPipeline(
+            vae=FakeVae(),  # TODO: oh...
+            text_encoder=None,
+            tokenizer=None,
+            unet=unet,
+            scheduler=scheduler,
+            safety_checker=None,
+            feature_extractor=None,
+            requires_safety_checker=False,
+        )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        seed, noise, latents = DenoiseLatentsInvocation.prepare_noise_and_latents(context, self.noise, self.latents)
+        _, _, latent_height, latent_width = latents.shape
+
+        # Calculate the tile locations to cover the latent-space image.
+        # TODO(ryand): Add constraints on the tile params. Is there a multiple-of constraint?
+        tiles = calc_tiles_min_overlap(
+            image_height=latent_height,
+            image_width=latent_width,
+            tile_height=self.tile_height,
+            tile_width=self.tile_width,
+            min_overlap=self.tile_min_overlap,
+        )
+
+        # Prepare an iterator that yields the UNet's LoRA models and their weights.
+        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            for lora in self.unet.loras:
+                lora_info = context.models.load(lora.lora)
+                assert isinstance(lora_info.model, LoRAModelRaw)
+                yield (lora_info.model, lora.weight)
+                del lora_info
+
+        # Load the UNet model.
+        unet_info = context.models.load(self.unet.unet)
+
+        with ExitStack() as exit_stack, unet_info as unet, ModelPatcher.apply_lora_unet(unet, _lora_loader()):
+            assert isinstance(unet, UNet2DConditionModel)
+            latents = latents.to(device=unet.device, dtype=unet.dtype)
+            if noise is not None:
+                noise = noise.to(device=unet.device, dtype=unet.dtype)
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+                seed=seed,
+            )
+            pipeline = self.create_pipeline(unet=unet, scheduler=scheduler)
+
+            # Prepare the prompt conditioning data. The same prompt conditioning is applied to all tiles.
+            conditioning_data = DenoiseLatentsInvocation.get_conditioning_data(
+                context=context,
+                positive_conditioning_field=self.positive_conditioning,
+                negative_conditioning_field=self.negative_conditioning,
+                unet=unet,
+                latent_height=self.tile_height,
+                latent_width=self.tile_width,
+                cfg_scale=self.cfg_scale,
+                steps=self.steps,
+                cfg_rescale_multiplier=self.cfg_rescale_multiplier,
+            )
+
+            controlnet_data = DenoiseLatentsInvocation.prep_control_data(
+                context=context,
+                control_input=self.control,
+                latents_shape=list(latents.shape),
+                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                do_classifier_free_guidance=True,
+                exit_stack=exit_stack,
+            )
+
+            # Split the controlnet_data into tiles.
+            # controlnet_data_tiles[t][c] is the c'th control data for the t'th tile.
+            controlnet_data_tiles: list[list[ControlNetData]] = []
+            for tile in tiles:
+                tile_controlnet_data = [crop_controlnet_data(cn, tile.coords) for cn in controlnet_data or []]
+                controlnet_data_tiles.append(tile_controlnet_data)
+
+            # Prepare the MultiDiffusionRegionConditioning list.
+            multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning] = []
+            for tile, tile_controlnet_data in zip(tiles, controlnet_data_tiles, strict=True):
+                multi_diffusion_conditioning.append(
+                    MultiDiffusionRegionConditioning(
+                        region=tile.coords,
+                        text_conditioning_data=conditioning_data,
+                        control_data=tile_controlnet_data,
+                    )
+                )
+
+            timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
+                scheduler,
+                device=unet.device,
+                steps=self.steps,
+                denoising_start=self.denoising_start,
+                denoising_end=self.denoising_end,
+                seed=seed,
+            )
+
+            # Run Multi-Diffusion denoising.
+            result_latents = pipeline.multi_diffusion_denoise(
+                multi_diffusion_conditioning=multi_diffusion_conditioning,
+                latents=latents,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                noise=noise,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                # TODO(ryand): Add proper callback.
+                callback=lambda x: None,
+            )
+
+        # TODO(ryand): I copied this from DenoiseLatentsInvocation. I'm not sure if it's actually important.
+        result_latents = result_latents.to("cpu")
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=result_latents)
+        return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
--- a/invokeai/app/invocations/tiled_stable_diffusion_refine.py
+++ b/invokeai/app/invocations/tiled_stable_diffusion_refine.py
@@ -0,0 +1,380 @@
+from contextlib import ExitStack
+from typing import Iterator, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import torch
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from PIL import Image
+from pydantic import field_validator
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.constants import DEFAULT_PRECISION, LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.denoise_latents import DenoiseLatentsInvocation, get_scheduler
+from invokeai.app.invocations.fields import (
+    ConditioningField,
+    FieldDescriptions,
+    ImageField,
+    Input,
+    InputField,
+    UIType,
+)
+from invokeai.app.invocations.image_to_latents import ImageToLatentsInvocation
+from invokeai.app.invocations.latents_to_image import LatentsToImageInvocation
+from invokeai.app.invocations.model import ModelIdentifierField, UNetField, VAEField
+from invokeai.app.invocations.noise import get_noise
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, prepare_control_image
+from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.stable_diffusion.diffusers_pipeline import ControlNetData, image_resized_to_grid_as_tensor
+from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending
+from invokeai.backend.tiles.utils import Tile
+from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.hotfixes import ControlNetModel
+
+
+@invocation(
+    "tiled_stable_diffusion_refine",
+    title="Tiled Stable Diffusion Refine",
+    tags=["upscale", "denoise"],
+    category="latents",
+    version="1.0.0",
+)
+class TiledStableDiffusionRefineInvocation(BaseInvocation):
+    """A tiled Stable Diffusion pipeline for refining high resolution images. This invocation is intended to be used to
+    refine an image after upscaling i.e. it is the second step in a typical "tiled upscaling" workflow.
+    """
+
+    image: ImageField = InputField(description="Image to be refined.")
+
+    positive_conditioning: ConditioningField = InputField(
+        description=FieldDescriptions.positive_cond, input=Input.Connection
+    )
+    negative_conditioning: ConditioningField = InputField(
+        description=FieldDescriptions.negative_cond, input=Input.Connection
+    )
+    # TODO(ryand): Add multiple-of validation.
+    tile_height: int = InputField(default=512, gt=0, description="Height of the tiles.")
+    tile_width: int = InputField(default=512, gt=0, description="Width of the tiles.")
+    tile_overlap: int = InputField(
+        default=16,
+        gt=0,
+        description="Target overlap between adjacent tiles (the last row/column may overlap more than this).",
+    )
+    steps: int = InputField(default=18, gt=0, description=FieldDescriptions.steps)
+    cfg_scale: float | list[float] = InputField(default=6.0, description=FieldDescriptions.cfg_scale, title="CFG Scale")
+    denoising_start: float = InputField(
+        default=0.65,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_start,
+    )
+    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
+    scheduler: SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description=FieldDescriptions.scheduler,
+        ui_type=UIType.Scheduler,
+    )
+    unet: UNetField = InputField(
+        description=FieldDescriptions.unet,
+        input=Input.Connection,
+        title="UNet",
+    )
+    cfg_rescale_multiplier: float = InputField(
+        title="CFG Rescale Multiplier", default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier
+    )
+    vae: VAEField = InputField(
+        description=FieldDescriptions.vae,
+        input=Input.Connection,
+    )
+    vae_fp32: bool = InputField(
+        default=DEFAULT_PRECISION == torch.float32, description="Whether to use float32 precision when running the VAE."
+    )
+    # HACK(ryand): We probably want to allow the user to control all of the parameters in ControlField. But, we akwardly
+    # don't want to use the image field. Figure out how best to handle this.
+    # TODO(ryand): Currently, there is no ControlNet preprocessor applied to the tile images. In other words, we pretty
+    # much assume that it is a tile ControlNet. We need to decide how we want to handle this. E.g. find a way to support
+    # CN preprocessors, raise a clear warning when a non-tile CN model is selected, hardcode the supported CN models,
+    # etc.
+    control_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.controlnet_model, ui_type=UIType.ControlNetModel
+    )
+    control_weight: float = InputField(default=0.6)
+
+    @field_validator("cfg_scale")
+    def ge_one(cls, v: list[float] | float) -> list[float] | float:
+        """Validate that all cfg_scale values are >= 1"""
+        if isinstance(v, list):
+            for i in v:
+                if i < 1:
+                    raise ValueError("cfg_scale must be greater than 1")
+        else:
+            if v < 1:
+                raise ValueError("cfg_scale must be greater than 1")
+        return v
+
+    @staticmethod
+    def crop_latents_to_tile(latents: torch.Tensor, image_tile: Tile) -> torch.Tensor:
+        """Crop the latent-space tensor to the area corresponding to the image-space tile.
+        The tile coordinates must be divisible by the LATENT_SCALE_FACTOR.
+        """
+        for coord in [image_tile.coords.top, image_tile.coords.left, image_tile.coords.right, image_tile.coords.bottom]:
+            if coord % LATENT_SCALE_FACTOR != 0:
+                raise ValueError(
+                    f"The tile coordinates must all be divisible by the latent scale factor"
+                    f" ({LATENT_SCALE_FACTOR}). {image_tile.coords=}."
+                )
+        assert latents.dim() == 4  # We expect: (batch_size, channels, height, width).
+
+        top = image_tile.coords.top // LATENT_SCALE_FACTOR
+        left = image_tile.coords.left // LATENT_SCALE_FACTOR
+        bottom = image_tile.coords.bottom // LATENT_SCALE_FACTOR
+        right = image_tile.coords.right // LATENT_SCALE_FACTOR
+        return latents[..., top:bottom, left:right]
+
+    def run_controlnet(
+        self,
+        image: Image.Image,
+        controlnet_model: ControlNetModel,
+        weight: float,
+        do_classifier_free_guidance: bool,
+        width: int,
+        height: int,
+        device: torch.device,
+        dtype: torch.dtype,
+        control_mode: CONTROLNET_MODE_VALUES = "balanced",
+        resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple",
+    ) -> ControlNetData:
+        control_image = prepare_control_image(
+            image=image,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            width=width,
+            height=height,
+            device=device,
+            dtype=dtype,
+            control_mode=control_mode,
+            resize_mode=resize_mode,
+        )
+        return ControlNetData(
+            model=controlnet_model,
+            image_tensor=control_image,
+            weight=weight,
+            begin_step_percent=0.0,
+            end_step_percent=1.0,
+            control_mode=control_mode,
+            # Any resizing needed should currently be happening in prepare_control_image(), but adding resize_mode to
+            # ControlNetData in case needed in the future.
+            resize_mode=resize_mode,
+        )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # TODO(ryand): Expose the seed parameter.
+        seed = 0
+
+        # Load the input image.
+        input_image = context.images.get_pil(self.image.image_name)
+
+        # Calculate the tile locations to cover the image.
+        # We have selected this tiling strategy to make it easy to achieve tile coords that are multiples of 8. This
+        # facilitates conversions between image space and latent space.
+        # TODO(ryand): Expose these tiling parameters. (Keep in mind the multiple-of constraints on these params.)
+        tiles = calc_tiles_with_overlap(
+            image_height=input_image.height,
+            image_width=input_image.width,
+            tile_height=self.tile_height,
+            tile_width=self.tile_width,
+            overlap=self.tile_overlap,
+        )
+
+        # Convert the input image to a torch.Tensor.
+        input_image_torch = image_resized_to_grid_as_tensor(input_image.convert("RGB"), multiple_of=LATENT_SCALE_FACTOR)
+        input_image_torch = input_image_torch.unsqueeze(0)  # Add a batch dimension.
+        # Validate our assumptions about the shape of input_image_torch.
+        assert input_image_torch.dim() == 4  # We expect: (batch_size, channels, height, width).
+        assert input_image_torch.shape[:2] == (1, 3)
+
+        # Split the input image into tiles in torch.Tensor format.
+        image_tiles_torch: list[torch.Tensor] = []
+        for tile in tiles:
+            image_tile = input_image_torch[
+                :,
+                :,
+                tile.coords.top : tile.coords.bottom,
+                tile.coords.left : tile.coords.right,
+            ]
+            image_tiles_torch.append(image_tile)
+
+        # Split the input image into tiles in numpy format.
+        # TODO(ryand): We currently maintain both np.ndarray and torch.Tensor tiles. Ideally, all operations should work
+        # with torch.Tensor tiles.
+        input_image_np = np.array(input_image)
+        image_tiles_np: list[npt.NDArray[np.uint8]] = []
+        for tile in tiles:
+            image_tile_np = input_image_np[
+                tile.coords.top : tile.coords.bottom,
+                tile.coords.left : tile.coords.right,
+                :,
+            ]
+            image_tiles_np.append(image_tile_np)
+
+        # VAE-encode each image tile independently.
+        # TODO(ryand): Is there any advantage to VAE-encoding the entire image before splitting it into tiles? What
+        # about for decoding?
+        vae_info = context.models.load(self.vae.vae)
+        latent_tiles: list[torch.Tensor] = []
+        for image_tile_torch in image_tiles_torch:
+            latent_tiles.append(
+                ImageToLatentsInvocation.vae_encode(
+                    vae_info=vae_info, upcast=self.vae_fp32, tiled=False, image_tensor=image_tile_torch
+                )
+            )
+
+        # Generate noise with dimensions corresponding to the full image in latent space.
+        # It is important that the noise tensor is generated at the full image dimension and then tiled, rather than
+        # generating for each tile independently. This ensures that overlapping regions between tiles use the same
+        # noise.
+        assert input_image_torch.shape[2] % LATENT_SCALE_FACTOR == 0
+        assert input_image_torch.shape[3] % LATENT_SCALE_FACTOR == 0
+        global_noise = get_noise(
+            width=input_image_torch.shape[3],
+            height=input_image_torch.shape[2],
+            device=TorchDevice.choose_torch_device(),
+            seed=seed,
+            downsampling_factor=LATENT_SCALE_FACTOR,
+            use_cpu=True,
+        )
+
+        # Crop the global noise into tiles.
+        noise_tiles = [self.crop_latents_to_tile(latents=global_noise, image_tile=t) for t in tiles]
+
+        # Prepare an iterator that yields the UNet's LoRA models and their weights.
+        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            for lora in self.unet.loras:
+                lora_info = context.models.load(lora.lora)
+                assert isinstance(lora_info.model, LoRAModelRaw)
+                yield (lora_info.model, lora.weight)
+                del lora_info
+
+        # Load the UNet model.
+        unet_info = context.models.load(self.unet.unet)
+
+        refined_latent_tiles: list[torch.Tensor] = []
+        with ExitStack() as exit_stack, unet_info as unet, ModelPatcher.apply_lora_unet(unet, _lora_loader()):
+            assert isinstance(unet, UNet2DConditionModel)
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+                seed=seed,
+            )
+            pipeline = DenoiseLatentsInvocation.create_pipeline(unet=unet, scheduler=scheduler)
+
+            # Prepare the prompt conditioning data. The same prompt conditioning is applied to all tiles.
+            # Assume that all tiles have the same shape.
+            _, _, latent_height, latent_width = latent_tiles[0].shape
+            conditioning_data = DenoiseLatentsInvocation.get_conditioning_data(
+                context=context,
+                positive_conditioning_field=self.positive_conditioning,
+                negative_conditioning_field=self.negative_conditioning,
+                unet=unet,
+                latent_height=latent_height,
+                latent_width=latent_width,
+                cfg_scale=self.cfg_scale,
+                steps=self.steps,
+                cfg_rescale_multiplier=self.cfg_rescale_multiplier,
+            )
+
+            # Load the ControlNet model.
+            # TODO(ryand): Support multiple ControlNet models.
+            controlnet_model = exit_stack.enter_context(context.models.load(self.control_model))
+            assert isinstance(controlnet_model, ControlNetModel)
+
+            # Denoise (i.e. "refine") each tile independently.
+            for image_tile_np, latent_tile, noise_tile in zip(image_tiles_np, latent_tiles, noise_tiles, strict=True):
+                assert latent_tile.shape == noise_tile.shape
+
+                # Prepare a PIL Image for ControlNet processing.
+                # TODO(ryand): This is a bit awkward that we have to prepare both torch.Tensor and PIL.Image versions of
+                # the tiles. Ideally, the ControlNet code should be able to work with Tensors.
+                image_tile_pil = Image.fromarray(image_tile_np)
+
+                # Run the ControlNet on the image tile.
+                height, width, _ = image_tile_np.shape
+                # The height and width must be evenly divisible by LATENT_SCALE_FACTOR. This is enforced earlier, but we
+                # validate this assumption here.
+                assert height % LATENT_SCALE_FACTOR == 0
+                assert width % LATENT_SCALE_FACTOR == 0
+                controlnet_data = self.run_controlnet(
+                    image=image_tile_pil,
+                    controlnet_model=controlnet_model,
+                    weight=self.control_weight,
+                    do_classifier_free_guidance=True,
+                    width=width,
+                    height=height,
+                    device=controlnet_model.device,
+                    dtype=controlnet_model.dtype,
+                    control_mode="balanced",
+                    resize_mode="just_resize_simple",
+                )
+
+                timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
+                    scheduler,
+                    device=unet.device,
+                    steps=self.steps,
+                    denoising_start=self.denoising_start,
+                    denoising_end=self.denoising_end,
+                    seed=seed,
+                )
+
+                # TODO(ryand): Think about when/if latents/noise should be moved off of the device to save VRAM.
+                latent_tile = latent_tile.to(device=unet.device, dtype=unet.dtype)
+                noise_tile = noise_tile.to(device=unet.device, dtype=unet.dtype)
+                refined_latent_tile = pipeline.latents_from_embeddings(
+                    latents=latent_tile,
+                    timesteps=timesteps,
+                    init_timestep=init_timestep,
+                    noise=noise_tile,
+                    seed=seed,
+                    mask=None,
+                    masked_latents=None,
+                    scheduler_step_kwargs=scheduler_step_kwargs,
+                    conditioning_data=conditioning_data,
+                    control_data=[controlnet_data],
+                    ip_adapter_data=None,
+                    t2i_adapter_data=None,
+                    callback=lambda x: None,
+                )
+                refined_latent_tiles.append(refined_latent_tile)
+
+        # VAE-decode each refined latent tile independently.
+        refined_image_tiles: list[Image.Image] = []
+        for refined_latent_tile in refined_latent_tiles:
+            refined_image_tile = LatentsToImageInvocation.vae_decode(
+                context=context,
+                vae_info=vae_info,
+                seamless_axes=self.vae.seamless_axes,
+                latents=refined_latent_tile,
+                use_fp32=self.vae_fp32,
+                use_tiling=False,
+            )
+            refined_image_tiles.append(refined_image_tile)
+
+        # TODO(ryand): I copied this from DenoiseLatentsInvocation. I'm not sure if it's actually important.
+        TorchDevice.empty_cache()
+
+        # Merge the refined image tiles back into a single image.
+        refined_image_tiles_np = [np.array(t) for t in refined_image_tiles]
+        merged_image_np = np.zeros(shape=(input_image.height, input_image.width, 3), dtype=np.uint8)
+        # TODO(ryand): Tune the blend_amount. Should this be exposed as a parameter?
+        merge_tiles_with_linear_blending(
+            dst_image=merged_image_np, tiles=tiles, tile_images=refined_image_tiles_np, blend_amount=self.tile_overlap
+        )
+
+        # Save the refined image and return its reference.
+        merged_image_pil = Image.fromarray(merged_image_np)
+        image_dto = context.images.save(image=merged_image_pil)
+
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/services/download/download_default.py
+++ b/invokeai/app/services/download/download_default.py
@@ -8,7 +8,7 @@ import time
 import traceback
 from pathlib import Path
 from queue import Empty, PriorityQueue
-from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set
+from typing import Any, Dict, List, Literal, Optional, Set

 import requests
 from pydantic.networks import AnyHttpUrl
@@ -34,9 +34,6 @@ from .download_base import (
    UnknownJobIDException,
 )

-if TYPE_CHECKING:
-    from invokeai.app.services.events.events_base import EventServiceBase
-
 # Maximum number of bytes to download during each call to requests.iter_content()
 DOWNLOAD_CHUNK_SIZE = 100000

--- a/invokeai/app/services/events/events_base.py
+++ b/invokeai/app/services/events/events_base.py
@@ -22,6 +22,7 @@ from invokeai.app.services.events.events_common import (
    ModelInstallCompleteEvent,
    ModelInstallDownloadProgressEvent,
    ModelInstallDownloadsCompleteEvent,
+    ModelInstallDownloadStartedEvent,
    ModelInstallErrorEvent,
    ModelInstallStartedEvent,
    ModelLoadCompleteEvent,
@@ -34,7 +35,6 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineInterme
 if TYPE_CHECKING:
    from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput
    from invokeai.app.services.download.download_base import DownloadJob
-    from invokeai.app.services.events.events_common import EventBase
    from invokeai.app.services.model_install.model_install_common import ModelInstallJob
    from invokeai.app.services.session_processor.session_processor_common import ProgressImage
    from invokeai.app.services.session_queue.session_queue_common import (
@@ -145,6 +145,10 @@ class EventServiceBase:

    # region Model install

+    def emit_model_install_download_started(self, job: "ModelInstallJob") -> None:
+        """Emitted at intervals while the install job is started (remote models only)."""
+        self.dispatch(ModelInstallDownloadStartedEvent.build(job))
+
    def emit_model_install_download_progress(self, job: "ModelInstallJob") -> None:
        """Emitted at intervals while the install job is in progress (remote models only)."""
        self.dispatch(ModelInstallDownloadProgressEvent.build(job))
--- a/invokeai/app/services/events/events_common.py
+++ b/invokeai/app/services/events/events_common.py
@@ -417,6 +417,42 @@ class ModelLoadCompleteEvent(ModelEventBase):
        return cls(config=config, submodel_type=submodel_type)


+@payload_schema.register
+class ModelInstallDownloadStartedEvent(ModelEventBase):
+    """Event model for model_install_download_started"""
+
+    __event_name__ = "model_install_download_started"
+
+    id: int = Field(description="The ID of the install job")
+    source: str = Field(description="Source of the model; local path, repo_id or url")
+    local_path: str = Field(description="Where model is downloading to")
+    bytes: int = Field(description="Number of bytes downloaded so far")
+    total_bytes: int = Field(description="Total size of download, including all files")
+    parts: list[dict[str, int | str]] = Field(
+        description="Progress of downloading URLs that comprise the model, if any"
+    )
+
+    @classmethod
+    def build(cls, job: "ModelInstallJob") -> "ModelInstallDownloadStartedEvent":
+        parts: list[dict[str, str | int]] = [
+            {
+                "url": str(x.source),
+                "local_path": str(x.download_path),
+                "bytes": x.bytes,
+                "total_bytes": x.total_bytes,
+            }
+            for x in job.download_parts
+        ]
+        return cls(
+            id=job.id,
+            source=str(job.source),
+            local_path=job.local_path.as_posix(),
+            parts=parts,
+            bytes=job.bytes,
+            total_bytes=job.total_bytes,
+        )
+
+
@payload_schema.register
 class ModelInstallDownloadProgressEvent(ModelEventBase):
    """Event model for model_install_download_progress"""
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@@ -9,7 +9,7 @@ from pathlib import Path
 from queue import Empty, Queue
 from shutil import copyfile, copytree, move, rmtree
 from tempfile import mkdtemp
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union

 import torch
 import yaml
@@ -60,9 +60,6 @@ from .model_install_common import (

 TMPDIR_PREFIX = "tmpinstall_"

-if TYPE_CHECKING:
-    from invokeai.app.services.events.events_base import EventServiceBase
-

 class ModelInstallService(ModelInstallServiceBase):
    """class for InvokeAI model installation."""
@@ -412,11 +409,14 @@ class ModelInstallService(ModelInstallServiceBase):
        if isinstance(source, HFModelSource):
            metadata = HuggingFaceMetadataFetch(self._session).from_id(source.repo_id, source.variant)
            assert isinstance(metadata, ModelMetadataWithFiles)
-            return metadata.download_urls(
-                variant=source.variant or self._guess_variant(),
-                subfolder=source.subfolder,
-                session=self._session,
-            ), metadata
+            return (
+                metadata.download_urls(
+                    variant=source.variant or self._guess_variant(),
+                    subfolder=source.subfolder,
+                    session=self._session,
+                ),
+                metadata,
+            )

        if isinstance(source, URLModelSource):
            try:
@@ -822,7 +822,7 @@ class ModelInstallService(ModelInstallServiceBase):
                install_job.download_parts = download_job.download_parts
                install_job.bytes = sum(x.bytes for x in download_job.download_parts)
                install_job.total_bytes = download_job.total_bytes
-                self._signal_job_downloading(install_job)
+                self._signal_job_download_started(install_job)

    def _download_progress_callback(self, download_job: MultiFileDownloadJob) -> None:
        with self._lock:
@@ -874,6 +874,13 @@ class ModelInstallService(ModelInstallServiceBase):
        if self._event_bus:
            self._event_bus.emit_model_install_started(job)

+    def _signal_job_download_started(self, job: ModelInstallJob) -> None:
+        if self._event_bus:
+            assert job._multifile_job is not None
+            assert job.bytes is not None
+            assert job.total_bytes is not None
+            self._event_bus.emit_model_install_download_started(job)
+
    def _signal_job_downloading(self, job: ModelInstallJob) -> None:
        if self._event_bus:
            assert job._multifile_job is not None
--- a/invokeai/app/util/controlnet_utils.py
+++ b/invokeai/app/util/controlnet_utils.py
@@ -289,7 +289,7 @@ def prepare_control_image(
    width: int,
    height: int,
    num_channels: int = 3,
-    device: str = "cuda",
+    device: str | torch.device = "cuda",
    dtype: torch.dtype = torch.float16,
    control_mode: CONTROLNET_MODE_VALUES = "balanced",
    resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple",
@@ -304,7 +304,7 @@ def prepare_control_image(
        num_channels (int, optional): The target number of image channels. This is achieved by converting the input
            image to RGB, then naively taking the first `num_channels` channels. The primary use case is converting a
            RGB image to a single-channel grayscale image. Raises if `num_channels` cannot be achieved. Defaults to 3.
-        device (str, optional): The target device for the output image. Defaults to "cuda".
+        device (str | torch.Device, optional): The target device for the output image. Defaults to "cuda".
        dtype (_type_, optional): The dtype for the output image. Defaults to torch.float16.
        do_classifier_free_guidance (bool, optional): If True, repeat the output image along the batch dimension.
            Defaults to True.
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -125,13 +125,16 @@ class IPAdapter(RawModel):
            self.device, dtype=self.dtype
        )

-    def to(self, device: torch.device, dtype: Optional[torch.dtype] = None):
-        self.device = device
+    def to(
+        self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, non_blocking: bool = False
+    ):
+        if device is not None:
+            self.device = device
        if dtype is not None:
            self.dtype = dtype

-        self._image_proj_model.to(device=self.device, dtype=self.dtype)
-        self.attn_weights.to(device=self.device, dtype=self.dtype)
+        self._image_proj_model.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
+        self.attn_weights.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)

    def calc_size(self):
        # workaround for circular import
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -61,9 +61,10 @@ class LoRALayerBase:
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype)
+            self.bias = self.bias.to(device=device, dtype=dtype, non_blocking=non_blocking)


 # TODO: find and debug lora/locon with bias
@@ -109,14 +110,15 @@ class LoRALayer(LoRALayerBase):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
-        super().to(device=device, dtype=dtype)
+        super().to(device=device, dtype=dtype, non_blocking=non_blocking)

-        self.up = self.up.to(device=device, dtype=dtype)
-        self.down = self.down.to(device=device, dtype=dtype)
+        self.up = self.up.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.down = self.down.to(device=device, dtype=dtype, non_blocking=non_blocking)

        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype)
+            self.mid = self.mid.to(device=device, dtype=dtype, non_blocking=non_blocking)


 class LoHALayer(LoRALayerBase):
@@ -169,18 +171,19 @@ class LoHALayer(LoRALayerBase):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
        super().to(device=device, dtype=dtype)

-        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype)
+            self.t1 = self.t1.to(device=device, dtype=dtype, non_blocking=non_blocking)

-        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
+            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)


 class LoKRLayer(LoRALayerBase):
@@ -265,6 +268,7 @@ class LoKRLayer(LoRALayerBase):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
        super().to(device=device, dtype=dtype)

@@ -273,19 +277,19 @@ class LoKRLayer(LoRALayerBase):
        else:
            assert self.w1_a is not None
            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)

        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype)
+            self.w2 = self.w2.to(device=device, dtype=dtype, non_blocking=non_blocking)
        else:
            assert self.w2_a is not None
            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)

        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
+            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)


 class FullLayer(LoRALayerBase):
@@ -319,10 +323,11 @@ class FullLayer(LoRALayerBase):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)


 class IA3Layer(LoRALayerBase):
@@ -358,11 +363,12 @@ class IA3Layer(LoRALayerBase):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ):
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype)
-        self.on_input = self.on_input.to(device=device, dtype=dtype)
+        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.on_input = self.on_input.to(device=device, dtype=dtype, non_blocking=non_blocking)


 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
@@ -388,10 +394,11 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
    ) -> None:
        # TODO: try revert if exception?
        for _key, layer in self.layers.items():
-            layer.to(device=device, dtype=dtype)
+            layer.to(device=device, dtype=dtype, non_blocking=non_blocking)

    def calc_size(self) -> int:
        model_size = 0
@@ -514,7 +521,7 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
            # lower memory consumption by removing already parsed layer values
            state_dict[layer_key].clear()

-            layer.to(device=device, dtype=dtype)
+            layer.to(device=device, dtype=dtype, non_blocking=True)
            model.layers[layer_key] = layer

        return model
--- a/invokeai/backend/model_hash/hash_validator.py
+++ b/invokeai/backend/model_hash/hash_validator.py
@@ -0,0 +1,24 @@
+import json
+from base64 import b64decode
+
+
+def validate_hash(hash: str):
+    if ":" not in hash:
+        return
+    for enc_hash in hashes:
+        alg, hash_ = hash.split(":")
+        if alg == "blake3":
+            alg = "blake3_single"
+        map = json.loads(b64decode(enc_hash))
+        if alg in map:
+            if hash_ == map[alg]:
+                raise Exception("Unrecoverable Model Error")
+
+
+hashes: list[str] = [
+    "eyJibGFrZTNfbXVsdGkiOiI3Yjc5ODZmM2QyNTk3MDZiMjVhZDRhM2NmNGM2MTcyNGNhZmQ0Yjc4NjI4MjIwNjMyZGU4NjVlM2UxNDEyMTVlIiwiYmxha2UzX3NpbmdsZSI6IjdiNzk4NmYzZDI1OTcwNmIyNWFkNGEzY2Y0YzYxNzI0Y2FmZDRiNzg2MjgyMjA2MzJkZTg2NWUzZTE0MTIxNWUiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiNzdlZmU5MzRhZGQ3YmU5Njc3NmJkODM3NWJhZDQxN2QiLCJzaGExIjoiYmM2YzYxYzgwNDgyMTE2ZTY2ZGQyNTYwNjRkYTgxYjFlY2U4NzMzOCIsInNoYTIyNCI6IjgzNzNlZGM4ZTg4Y2UxMTljODdlOTM2OTY4ZWViMWNmMzdjZGY4NTBmZjhjOTZkYjNmMDc4YmE0Iiwic2hhMjU2IjoiNzNjYWMxZWRlZmUyZjdlODFkNjRiMTI2YjIxMmY2Yzk2ZTAwNjgyNGJjZmJkZDI3Y2E5NmUyNTk5ZTQwNzUwZiIsInNoYTM4NCI6IjlmNmUwNzlmOTNiNDlkMTg1YzEyNzY0OGQwNzE3YTA0N2E3MzYyNDI4YzY4MzBhNDViNzExODAwZDE4NjIwZDZjMjcwZGE3ZmY0Y2FjOTRmNGVmZDdiZWQ5OTlkOWU0ZCIsInNoYTUxMiI6IjAwNzE5MGUyYjk5ZjVlN2Q1OGZiYWI2YTk1YmY0NjJiODhkOTg1N2NlNjY4MTMyMGJmM2M0Y2ZiZmY0MjkxZmEzNTMyMTk3YzdkODc2YWQ3NjZhOTQyOTQ2Zjc1OWY2YTViNDBlM2I2MzM3YzIwNWI0M2JkOWMyN2JiMTljNzk0IiwiYmxha2UyYiI6IjlhN2VhNTQzY2ZhMmMzMWYyZDIyNjg2MjUwNzUyNDE0Mjc1OWJiZTA0MWZlMWJkMzQzNDM1MWQwNWZlYjI2OGY2MjU0OTFlMzlmMzdkYWQ4MGM2Y2UzYTE4ZjAxNGEzZjJiMmQ2OGU2OTc0MjRmNTU2M2Y5ZjlhYzc1MzJiMjEwIiwiYmxha2UycyI6ImYxZmMwMjA0YjdjNzIwNGJlNWI1YzY3NDEyYjQ2MjY5NWE3YjFlYWQ2M2E5ZGVkMjEzYjZmYTU0NGZjNjJlYzUiLCJzaGEzXzIyNCI6IjljZDQ3YTBhMzA3NmNmYzI0NjJhNTAzMjVmMjg4ZjFiYzJjMmY2NmU2ODIxODc5NjJhNzU0NjFmIiwic2hhM18yNTYiOiI4NTFlNGI1ZDI1MWZlZTFiYzk0ODU1OWNjMDNiNjhlNTllYWU5YWI1ZTUyYjA0OTgxYTRhOTU4YWQyMDdkYjYwIiwic2hhM18zODQiOiJiZDA2ZTRhZGFlMWQ0MTJmZjFjOTcxMDJkZDFlN2JmY2UzMDViYTgxMTgyNzM3NWY5NTI4OWJkOGIyYTUxNjdiMmUyNzZjODNjNTU3ODFhMTEyMDRhNzc5MTUwMzM5ZTEiLCJzaGEzXzUxMiI6ImQ1ZGQ2OGZmZmY5NGRhZjJhMDkzZTliNmM1MTBlZmZkNThmZTA0ODMyZGQzMzEyOTZmN2NkZmYzNmRhZmQ3NGMxY2VmNjUxNTBkZjk5OGM1ODgyY2MzMzk2MTk1ZTViYjc5OTY1OGFkMTQ3MzFiMjJmZWZiMWQzNmY2MWJjYzJjIiwic2hha2VfMTI4IjoiOWJlNTgwNWMwNjg1MmZmNDUzNGQ4ZDZmODYyMmFkOTJkMGUwMWE2Y2JmYjIwN2QxOTRmM2JkYThiOGNmNWU4ZiIsInNoYWtlXzI1NiI6IjRhYjgwYjY2MzcxYzdhNjBhYWM4NDVkMTZlNWMzZDNhMmM4M2FjM2FjZDNiNTBiNzdjYWYyYTNmMWMyY2ZjZjc5OGNjYjkxN2FjZjQzNzBmZDdjN2ZmODQ5M2Q3NGY1MWM4NGU3M2ViZGQ4MTRmM2MwMzk3YzI4ODlmNTI0Mzg3In0K",
+    "eyJibGFrZTNfbXVsdGkiOiI4ODlmYzIwMDA4NWY1NWY4YTA4MjhiODg3MDM0OTRhMGFmNWZkZGI5N2E2YmYwMDRjM2VkYTdiYzBkNDU0MjQzIiwiYmxha2UzX3NpbmdsZSI6Ijg4OWZjMjAwMDg1ZjU1ZjhhMDgyOGI4ODcwMzQ5NGEwYWY1ZmRkYjk3YTZiZjAwNGMzZWRhN2JjMGQ0NTQyNDMiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiNTIzNTRhMzkzYTVmOGNjNmMyMzQ0OThiYjcxMDljYzEiLCJzaGExIjoiMTJmYmRhOGE3ZGUwOGMwNDc2NTA5OWY2NGNmMGIzYjcxMjc1MGM1NyIsInNoYTIyNCI6IjEyZWU3N2U0Y2NhODViMDk4YjdjNWJlMWFjNGMwNzljNGM3MmJmODA2YjdlZjU1NGI0NzgxZDkxIiwic2hhMjU2IjoiMjU1NTMwZDAyYTY4MjY4OWE5ZTZjMjRhOWZhMDM2OGNhODMxZTI1OTAyYjM2NzQyNzkwZTk3NzU1ZjEzMmNmNSIsInNoYTM4NCI6IjhkMGEyMTRlNDk0NGE2NGY3ZmZjNTg3MGY0ZWUyZTA0OGIzYjRjMmQ0MGRmMWFmYTVlOGE1ZWNkN2IwOTY3M2ZjNWI5YzM5Yzg4Yjc2YmIwY2I4ZjQ1ZjAxY2MwNjZkNCIsInNoYTUxMiI6Ijg3NTM3OWNiYzdlOGYyNzU4YjVjMDY5ZTU2ZWRjODY1ODE4MGFkNDEzNGMwMzY1NzM4ZjM1YjQwYzI2M2JkMTMwMzcwZTE0MzZkNDNmOGFhMTgyMTg5MzgzMTg1ODNhOWJhYTUyYTBjMTk1Mjg5OTQzYzZiYTY2NTg1Yjg5M2ZiIiwiYmxha2UyYiI6IjBhY2MwNWEwOGE5YjhhODNmZTVjYTk4ZmExMTg3NTYwNjk0MjY0YWUxNTI4NDliYzFkNzQzNTYzMzMyMTlhYTg3N2ZiNjc4MmRjZDZiOGIyYjM1MTkyNDQzNDE2ODJiMTQ3YmY2YTY3MDU2ZWIwOTQ4MzE1M2E4Y2ZiNTNmMTI0IiwiYmxha2UycyI6ImY5ZTRhZGRlNGEzZDRhOTZhOWUyNjVjMGVmMjdmZDNiNjA0NzI1NDllMTEyMWQzOGQwMTkxNTY5ZDY5YzdhYzAiLCJzaGEzXzIyNCI6ImM0NjQ3MGRjMjkyNGI0YjZkMTA2NDY5MDRiNWM2OGVjNTU2YmQ4MTA5NmVkMTA4YjZiMzQyZmU1Iiwic2hhM18yNTYiOiIwMDBlMThiZTI1MzYxYTk0NGExZTIwNjQ5ZmY0ZGM2OGRiZTk0OGNkNTYwY2I5MTFhODU1OTE3ODdkNWQ5YWYwIiwic2hhM18zODQiOiIzNDljZmVhMGUxZGE0NWZlMmYzNjJhMWFjZjI1ZTczOWNiNGQ0NDdiM2NiODUzZDVkYWNjMzU5ZmRhMWE1M2FhYWU5OTM2ZmFhZWM1NmFhZDkwMThhYjgxMTI4ZjI3N2YiLCJzaGEzXzUxMiI6ImMxNDgwNGY1YTNjNWE4ZGEyMTAyODk1YTFjZGU4MmIwNGYwZmY4OTczMTc0MmY2NDQyY2NmNzQ1OTQzYWQ5NGViOWZmMTNhZDg3YjRmODkxN2M5NmY5ZjMwZjkwYTFhYTI4OTI3OTkwMjg0ZDJhMzcyMjA0NjE4MTNiNDI0MzEyIiwic2hha2VfMTI4IjoiN2IxY2RkMWUyMzUzMzk0OTg5M2UyMmZkMTAwZmU0YjJhMTU1MDJmMTNjMTI0YzhiZDgxY2QwZDdlOWEzMGNmOCIsInNoYWtlXzI1NiI6ImI0NjMzZThhMjNkZDM0ODk0ZTIyNzc0ODYyNTE1MzVjYWFlNjkyMTdmOTQ0NTc3MzE1NTljODBjNWQ3M2ZkOTMxZTFjMDJlZDI0Yjc3MzE3OTJjMjVlNTZhYjg3NjI4YmJiMDgxNTU0MjU2MWY5ZGI2NWE0NDk4NDFmNGQzYTU4In0K",
+    "eyJibGFrZTNfbXVsdGkiOiI2Y2M0MmU4NGRiOGQyZTliYjA4YjUxNWUwYzlmYzg2NTViNDUwNGRlZDM1MzBlZjFjNTFjZWEwOWUxYThiNGYxIiwiYmxha2UzX3NpbmdsZSI6IjZjYzQyZTg0ZGI4ZDJlOWJiMDhiNTE1ZTBjOWZjODY1NWI0NTA0ZGVkMzUzMGVmMWM1MWNlYTA5ZTFhOGI0ZjEiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiZDQwNjk3NTJhYjQ0NzFhZDliMDY3YmUxMmRjNTM2ZjYiLCJzaGExIjoiOGRjZmVlMjZjZjUyOTllMDBjN2QwZjJiZTc0NmVmMTlkZjliZGExNCIsInNoYTIyNCI6IjhjMzAzOTU3ZjI3NDNiMjUwNmQyYzIzY2VmNmU4MTQ5MTllZmE2MWM0MTFiMDk5ZmMzODc2MmRjIiwic2hhMjU2IjoiZDk3ZjQ2OWJjMWZkMjhjMjZkMjJhN2Y3ODczNzlhZmM4NjY3ZmZmM2FhYTQ5NTE4NmQyZTM4OTU2MTBjZDJmMyIsInNoYTM4NCI6IjY0NmY0YWM0ZDA2YWJkZmE2MDAwN2VjZWNiOWNjOTk4ZmJkOTBiYzYwMmY3NTk2M2RhZDUzMGMzNGE5ZGE1YzY4NjhlMGIwMDJkZDNlMTM4ZjhmMjA2ODcyNzFkMDVjMSIsInNoYTUxMiI6ImYzZTU4NTA0YzYyOGUwYjViNzBhOTYxYThmODA1MDA1NjQ1M2E5NDlmNTgzNDhiYTNhZTVlMjdkNDRhNGJkMjc5ZjA3MmU1OGQ5YjEyOGE1NDc1MTU2ZmM3YzcxMGJkYjI3OWQ5OGFmN2EwYTI4Y2Y1ZDY2MmQxODY4Zjg3ZjI3IiwiYmxha2UyYiI6ImFhNjgyYmJjM2U1ZGRjNDZkNWUxN2VjMzRlNmEzZGY5ZjhiNWQyNzk0YTZkNmY0M2VjODMxZjhjOTU2OGYyY2RiOGE4YjAyNTE4MDA4YmY0Y2FhYTlhY2FhYjNkNzRmZmRiNGZlNDgwOTcwODU3OGJiZjNlNzJjYTc5ZDQwYzZmIiwiYmxha2UycyI6ImQ0ZGJlZTJkMmZlNDMwOGViYTkwMTY1MDdmMzI1ZmJiODZlMWQzNDQ0MjgzNzRlMjAwNjNiNWQ1MzkzZTExNjMiLCJzaGEzXzIyNCI6ImE1ZTM5NWZlNGRlYjIyY2JhNjgwMWFiZTliZjljMjM2YmMzYjkwZDdiN2ZjMTRhZDhjZjQ0NzBlIiwic2hhM18yNTYiOiIwOWYwZGVjODk0OWEzYmQzYzU3N2RjYzUyMTMwMGRiY2UwMjVjM2VjOTJkNzQ0MDJkNTE1ZDA4NTQwODg2NGY1Iiwic2hhM18zODQiOiJmMjEyNmM5NTcxODQ3NDZmNjYyMjE4MTRkMDZkZWQ3NDBhYWU3MDA4MTc0YjI0OTEzY2YwOTQzY2IwMTA5Y2QxNWI4YmMwOGY1YjUwMWYwYzhhOTY4MzUwYzgzY2I1ZWUiLCJzaGEzXzUxMiI6ImU1ZmEwMzIwMzk2YTJjMThjN2UxZjVlZmJiODYwYTU1M2NlMTlkMDQ0MWMxNWEwZTI1M2RiNjJkM2JmNjg0ZDI1OWIxYmQ4OTJkYTcyMDVjYTYyODQ2YzU0YWI1ODYxOTBmNDUxZDlmZmNkNDA5YmU5MzlhNWM1YWIyZDdkM2ZkIiwic2hha2VfMTI4IjoiNGI2MTllM2I4N2U1YTY4OTgxMjk0YzgzMmU0NzljZGI4MWFmODdlZTE4YzM1Zjc5ZjExODY5ZWEzNWUxN2I3MiIsInNoYWtlXzI1NiI6ImYzOWVkNmMxZmQ2NzVmMDg3ODAyYTc4ZTUwYWFkN2ZiYTZiM2QxNzhlZWYzMjRkMTI3ZTZjYmEwMGRjNzkwNTkxNjQ1Y2U1Y2NmMjhjYzVkNWRkODU1OWIzMDMxYTM3ZjE5NjhmYmFhNDQzMmI2ZWU0Yzg3ZWE2YTdkMmE2NWM2In0K",
+    "eyJibGFrZTNfbXVsdGkiOiJhNDRiZjJkMzVkZDI3OTZlZTI1NmY0MzVkODFhNTdhOGM0MjZhMzM5ZDc3NTVkMmNiMjdmMzU4ZjM0NTM4OWM2IiwiYmxha2UzX3NpbmdsZSI6ImE0NGJmMmQzNWRkMjc5NmVlMjU2ZjQzNWQ4MWE1N2E4YzQyNmEzMzlkNzc1NWQyY2IyN2YzNThmMzQ1Mzg5YzYiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiOGU5OTMzMzEyZjg4NDY4MDg0ZmRiZWNjNDYyMTMxZTgiLCJzaGExIjoiNmI0MmZjZDFmMmQyNzUwYWNkY2JkMTUzMmQ4NjQ5YTM1YWI2NDYzNCIsInNoYTIyNCI6ImQ2Y2E2OTUxNzIzZjdjZjg0NzBjZWRjMmVhNjA2ODNmMWU4NDMzM2Q2NDM2MGIzOWIyMjZlZmQzIiwic2hhMjU2IjoiMDAxNGY5Yzg0YjcwMTFhMGJkNzliNzU0NGVjNzg4NDQzNWQ4ZGY0NmRjMDBiNDk0ZmFkYzA4NWQzNDM1NjI4MyIsInNoYTM4NCI6IjMxODg2OTYxODc4NWY3MWJlM2RlZjkyZDgyNzY2NjBhZGE0MGViYTdkMDk1M2Y0YTc5ODdlMThhNzFlNjBlY2EwY2YyM2YwMjVhMmQ4ZjUyMmNkZGY3MTcxODFhMTQxNSIsInNoYTUxMiI6IjdmZGQxN2NmOWU3ZTBhZDcwMzJjMDg1MTkyYWMxZmQ0ZmFhZjZkNWNlYzAzOTE5ZDk0MmZiZTIyNWNhNmIwZTg0NmQ4ZGI0ZjllYTQ5MjJlMTdhNTg4MTY4YzExMTM1NWZiZDQ1NTlmMmU5NDcwNjAwZWE1MzBhMDdiMzY0YWQwIiwiYmxha2UyYiI6IjI0ZjExZWI5M2VlN2YxOTI5NWZiZGU5MTczMmE0NGJkZGYxOWE1ZTQ4MWNmOWFhMjQ2M2UzNDllYjg0Mzc4ZDBkODFjNzY0YWQ1NTk1YjkxZjQzYzgxODcxNTRlYWU5NTZkY2ZjZTlkMWU2MTZjNTFkZThhZDZjZTBhODcyY2Q0IiwiYmxha2UycyI6IjVkZTUwZDUwMGYwYTBmOGRlMTEwOGE2ZmFkZGM4ODNlMTA3NmQ3MThiNmQxN2E4ZDVkMjgzZDdiNGYzZDU2OGEiLCJzaGEzXzIyNCI6IjFhNTA0OGNlYWZiYjg2ZDc4ZmNiNTI0ZTViYTc4NWQ2ZmY5NzY1ZTNlMzdhZWRjZmYxZGVjNGJhIiwic2hhM18yNTYiOiI0YjA0YjE1NTRmMzRkYTlmMjBmZDczM2IzNDg4NjE0ZWNhM2IwOWU1OTJjOGJlMmM0NjA1NjYyMWU0MjJmZDllIiwic2hhM18zODQiOiI1NjMwYjM2OGQ4MGM1YmM5MTgzM2VmNWM2YWUzOTJhNDE4NTNjYmM2MWJiNTI4ZDE4YWM1OWFjZGZiZWU1YThkMWMyZDE4MTM1ZGI2ZWQ2OTJlODFkZThmYTM3MzkxN2MiLCJzaGEzXzUxMiI6IjA2ODg4MGE1MmNiNDkzODYwZDhjOTVhOTFhZGFmZTYwZGYxODc2ZDhjYjFhNmI3NTU2ZjJjM2Y1NjFmMGYwZjMyZjZhYTA1YmVmN2FhYjQ5OWEwNTM0Zjk0Njc4MDEzODlmNDc0ODFiNzcxMjdjMDFiOGFhOTY4NGJhZGUzYmY2Iiwic2hha2VfMTI4IjoiODlmYTdjNDcwNGI4NGZkMWQ1M2E0MTBlN2ZjMzU3NWRhNmUxMGU1YzkzMjM1NWYyZWEyMWM4NDVhZDBlM2UxOCIsInNoYWtlXzI1NiI6IjE4NGNlMWY2NjdmYmIyODA5NWJhZmVkZTQzNTUzZjhkYzBhNGY1MDQwYWJlMjcxMzkzMzcwNDEyZWFiZTg0ZGJhNjI0Y2ZiZWE4YzUxZDU2YzkwMTM2Mjg2ODgyZmQ0Y2E3MzA3NzZjNWUzODFlYzI5MWYxYTczOTE1MDkyMTFmIn0K",
+    "eyJibGFrZTNfbXVsdGkiOiJhYjA2YjNmMDliNTExOTAzMTMzMzY5NDE2MTc4ZDk2ZjlkYTc3ZGEwOTgyNDJmN2VlMTVjNTNhNTRkMDZhNWVmIiwiYmxha2UzX3NpbmdsZSI6ImFiMDZiM2YwOWI1MTE5MDMxMzMzNjk0MTYxNzhkOTZmOWRhNzdkYTA5ODI0MmY3ZWUxNWM1M2E1NGQwNmE1ZWYiLCJyYW5kb20iOiJhNDQxYjE1ZmU5YTNjZjU2NjYxMTkwYTBiOTNiOWRlYzdkMDQxMjcyODhjYzg3MjUwOTY3Y2YzYjUyODk0ZDExIiwibWQ1IjoiZWY0MjcxYjU3NTQwMjU4NGQ2OTI5ZWJkMGI3Nzk5NzYiLCJzaGExIjoiMzgzNzliYWQzZjZiZjc4MmM4OTgzOGY3YWVkMzRkNDNkMzNlYWM2MSIsInNoYTIyNCI6ImQ5ZDNiMjJkYmZlY2M1NTdlODAzNjg5M2M3ZWE0N2I0NTQzYzM2NzZhMDk4NzMxMzRhNjQ0OWEwIiwic2hhMjU2IjoiMjYxZGI3NmJlMGYxMzdlZWJkYmI5OGRlYWM0ZjcyMDdiOGUxMjdiY2MyZmMwODI5OGVjZDczYjQ3MjYxNjQ1NiIsInNoYTM4NCI6IjMzMjkwYWQxYjlhMmRkYmU0ODY3MWZiMTIxNDdiZWJhNjI4MjA1MDcwY2VkNjNiZTFmNGU5YWRhMjgwYWU2ZjZjNDkzYTY2MDllMGQ2YTIzMWU2ODU5ZmIyNGZhM2FjMCIsInNoYTUxMiI6IjAzMDZhMWI1NmNiYTdjNjJiNTNmNTk4MTAwMTQ3MDQ5ODBhNGRmZTdjZjQ5NTU4ZmMyMmQxZDczZDc5NzJmZTllODk2ZWRjMmEyYTQxYWVjNjRjZjkwZGUwYjI1NGM0MDBlZTU1YzcwZjk3OGVlMzk5NmM2YzhkNTBjYTI4YTdiIiwiYmxha2UyYiI6IjY1MDZhMDg1YWQ5MGZkZjk2NGJmMGE5NTFkZmVkMTllZTc0NGVjY2EyODQzZjQzYTI5NmFjZDM0M2RiODhhMDNlNTlkNmFmMGM1YWJkNTEzMzc4MTQ5Yjg3OTExMTVmODRmMDIyZWM1M2JmNGFjNDZhZDczNWIwMmJlYTM0MDk5IiwiYmxha2UycyI6IjdlZDQ3ZWQxOTg3MTk0YWFmNGIwMjQ3MWFkNTMyMmY3NTE3ZjI0OTcwMDc2Y2NmNDkzMWI0MzYxMDU1NzBlNDAiLCJzaGEzXzIyNCI6Ijk2MGM4MDExOTlhMGUzYWExNjdiNmU2MWVkMzE2ZDUzMDM2Yjk4M2UyOThkNWI5MjZmMDc3NDlhIiwic2hhM18yNTYiOiIzYzdmYWE1ZDE3Zjk2MGYxOTI2ZjNlNGIyZjc1ZjdiOWIyZDQ4NGFhNmEwM2ViOWNlMTI4NmM2OTE2YWEyM2RlIiwic2hhM18zODQiOiI5Y2Y0NDA1NWFjYzFlYjZmMDY1YjRjODcxYTYzNTM1MGE1ZjY0ODQwM2YwYTU0MWEzYzZhNjI3N2ViZjZmYTNjYmM1YmJiNjQwMDE4OGFlMWIxMTI2OGZmMDJiMzYzZDUiLCJzaGEzXzUxMiI6ImEyZDk3ZDRlYjYxM2UwZDViYTc2OTk2MzE2MzcxOGEwNDIxZDkxNTNiNjllYjM5MDRmZjI4ODRhZDdjNGJiYmIwNGY2Nzc1OTA1YmQxNGI2NTJmZTQ1Njg0YmI5MTQ3ZjBkYWViZjAxZjIzY2MzZDhkMjIzMTE0MGUzNjI4NTE5Iiwic2hha2VfMTI4IjoiNjkwMWMwYjg1MTg5ZTkyNTJiODI3MTc5NjE2MjRlMTM0MDQ1ZjlkMmI5MzM0MzVkM2Y0OThiZWIyN2Q3N2JiNSIsInNoYWtlXzI1NiI6ImIwMjA4ZTFkNDVjZWI0ODdiZDUwNzk3MWJiNWI3MjdjN2UyYmE3ZDliNWM2ZTEyYWE5YTNhOTY5YzcyNDRjODIwZDcyNDY1ODhlZWU3Yjk4ZWM1NzhjZWIxNjc3OTkxODljMWRkMmZkMmZmYWM4MWExZDAzZDFiNjMxOGRkMjBiIn0K",
+]
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -31,6 +31,7 @@ from typing_extensions import Annotated, Any, Dict

 from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES
 from invokeai.app.util.misc import uuid_string
+from invokeai.backend.model_hash.hash_validator import validate_hash

 from ..raw_model import RawModel

@@ -448,4 +449,6 @@ class ModelConfigFactory(object):
            model.key = key
        if isinstance(model, CheckpointConfigBase) and timestamp is not None:
            model.converted_at = timestamp
+        if model:
+            validate_hash(model.hash)
        return model  # type: ignore
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -285,9 +285,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
                else:
                    new_dict: Dict[str, torch.Tensor] = {}
                    for k, v in cache_entry.state_dict.items():
-                        new_dict[k] = v.to(torch.device(target_device), copy=True)
+                        new_dict[k] = v.to(torch.device(target_device), copy=True, non_blocking=True)
                    cache_entry.model.load_state_dict(new_dict, assign=True)
-            cache_entry.model.to(target_device)
+            cache_entry.model.to(target_device, non_blocking=True)
            cache_entry.device = target_device
        except Exception as e:  # blow away cache entry
            self._delete_cache_entry(cache_entry)
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -10,7 +10,7 @@ from picklescan.scanner import scan_file_path
 import invokeai.backend.util.logging as logger
 from invokeai.app.util.misc import uuid_string
 from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
-from invokeai.backend.util.util import SilenceWarnings
+from invokeai.backend.util.silence_warnings import SilenceWarnings

 from .config import (
    AnyModelConfig,
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@@ -67,7 +67,7 @@ class ModelPatcher:
        unet: UNet2DConditionModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
        model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> None:
+    ) -> Generator[None, None, None]:
        with cls.apply_lora(
            unet,
            loras=loras,
@@ -83,7 +83,7 @@ class ModelPatcher:
        text_encoder: CLIPTextModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
        model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> None:
+    ) -> Generator[None, None, None]:
        with cls.apply_lora(text_encoder, loras=loras, prefix="lora_te_", model_state_dict=model_state_dict):
            yield

@@ -95,7 +95,7 @@ class ModelPatcher:
        loras: Iterator[Tuple[LoRAModelRaw, float]],
        prefix: str,
        model_state_dict: Optional[Dict[str, torch.Tensor]] = None,
-    ) -> Generator[Any, None, None]:
+    ) -> Generator[None, None, None]:
        """
        Apply one or more LoRAs to a model.

@@ -139,12 +139,12 @@ class ModelPatcher:
                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
                        # same thing in a single call to '.to(...)'.
-                        layer.to(device=device)
-                        layer.to(dtype=torch.float32)
+                        layer.to(device=device, non_blocking=True)
+                        layer.to(dtype=torch.float32, non_blocking=True)
                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
-                        layer.to(device=torch.device("cpu"))
+                        layer.to(device=torch.device("cpu"), non_blocking=True)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
                        if module.weight.shape != layer_weight.shape:
@@ -153,7 +153,7 @@ class ModelPatcher:
                            layer_weight = layer_weight.reshape(module.weight.shape)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
-                        module.weight += layer_weight.to(dtype=dtype)
+                        module.weight += layer_weight.to(dtype=dtype, non_blocking=True)

            yield  # wait for context manager exit

@@ -161,7 +161,7 @@ class ModelPatcher:
            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
            with torch.no_grad():
                for module_key, weight in original_weights.items():
-                    model.get_submodule(module_key).weight.copy_(weight)
+                    model.get_submodule(module_key).weight.copy_(weight, non_blocking=True)

    @classmethod
    @contextmanager
--- a/invokeai/backend/onnx/onnx_runtime.py
+++ b/invokeai/backend/onnx/onnx_runtime.py
@@ -6,6 +6,7 @@ from typing import Any, List, Optional, Tuple, Union

 import numpy as np
 import onnx
+import torch
 from onnx import numpy_helper
 from onnxruntime import InferenceSession, SessionOptions, get_available_providers

@@ -188,6 +189,15 @@ class IAIOnnxRuntimeModel(RawModel):
        # return self.io_binding.copy_outputs_to_cpu()
        return self.session.run(None, inputs)

+    # compatability with RawModel ABC
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
+    ) -> None:
+        pass
+
    # compatability with diffusers load code
    @classmethod
    def from_pretrained(
--- a/invokeai/backend/raw_model.py
+++ b/invokeai/backend/raw_model.py
@@ -10,6 +10,20 @@ The term 'raw' was introduced to describe a wrapper around a torch.nn.Module
 that adds additional methods and attributes.
 """

+from abc import ABC, abstractmethod
+from typing import Optional

-class RawModel:
-    """Base class for 'Raw' model wrappers."""
+import torch
+
+
+class RawModel(ABC):
+    """Abstract base class for 'Raw' model wrappers."""
+
+    @abstractmethod
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
+    ) -> None:
+        pass
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -10,12 +10,11 @@ import PIL.Image
 import psutil
 import torch
 import torchvision.transforms as T
-from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.models.controlnet import ControlNetModel
+from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
-from diffusers.schedulers import KarrasDiffusionSchedulers
-from diffusers.schedulers.scheduling_utils import SchedulerMixin
+from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
 from diffusers.utils.import_utils import is_xformers_available
 from pydantic import Field
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
@@ -26,6 +25,7 @@ from invokeai.backend.stable_diffusion.diffusion.shared_invokeai_diffusion impor
 from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher, UNetIPAdapterData
 from invokeai.backend.util.attention import auto_detect_slice_size
 from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.hotfixes import ControlNetModel


@dataclass
@@ -38,56 +38,18 @@ class PipelineIntermediateState:
    predicted_original: Optional[torch.Tensor] = None


-@dataclass
-class AddsMaskLatents:
-    """Add the channels required for inpainting model input.
-
-    The inpainting model takes the normal latent channels as input, _plus_ a one-channel mask
-    and the latent encoding of the base image.
-
-    This class assumes the same mask and base image should apply to all items in the batch.
-    """
-
-    forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]
-    mask: torch.Tensor
-    initial_image_latents: torch.Tensor
-
-    def __call__(
-        self,
-        latents: torch.Tensor,
-        t: torch.Tensor,
-        text_embeddings: torch.Tensor,
-        **kwargs,
-    ) -> torch.Tensor:
-        model_input = self.add_mask_channels(latents)
-        return self.forward(model_input, t, text_embeddings, **kwargs)
-
-    def add_mask_channels(self, latents):
-        batch_size = latents.size(0)
-        # duplicate mask and latents for each batch
-        mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
-        image_latents = einops.repeat(self.initial_image_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
-        # add mask and image as additional channels
-        model_input, _ = einops.pack([latents, mask, image_latents], "b * h w")
-        return model_input
-
-
-def are_like_tensors(a: torch.Tensor, b: object) -> bool:
-    return isinstance(b, torch.Tensor) and (a.size() == b.size())
-
-
@dataclass
 class AddsMaskGuidance:
-    mask: torch.FloatTensor
-    mask_latents: torch.FloatTensor
+    mask: torch.Tensor
+    mask_latents: torch.Tensor
    scheduler: SchedulerMixin
    noise: torch.Tensor
-    gradient_mask: bool
+    is_gradient_mask: bool

    def __call__(self, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
        return self.apply_mask(latents, t)

-    def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor:
+    def apply_mask(self, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
        batch_size = latents.size(0)
        mask = einops.repeat(self.mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
        if t.dim() == 0:
@@ -100,7 +62,7 @@ class AddsMaskGuidance:
        # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
        # mask_latents = self.scheduler.scale_model_input(mask_latents, t)
        mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
-        if self.gradient_mask:
+        if self.is_gradient_mask:
            threshhold = (t.item()) / self.scheduler.config.num_train_timesteps
            mask_bool = mask > threshhold  # I don't know when mask got inverted, but it did
            masked_input = torch.where(mask_bool, latents, mask_latents)
@@ -200,7 +162,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        safety_checker: Optional[StableDiffusionSafetyChecker],
        feature_extractor: Optional[CLIPFeatureExtractor],
        requires_safety_checker: bool = False,
-        control_model: ControlNetModel = None,
    ):
        super().__init__(
            vae=vae,
@@ -214,8 +175,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        )

        self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
-        self.control_model = control_model
-        self.use_ip_adapter = False

    def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
        """
@@ -280,116 +239,131 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
    def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False):
        raise Exception("Should not be called")

+    def add_inpainting_channels_to_latents(
+        self, latents: torch.Tensor, masked_ref_image_latents: torch.Tensor, inpainting_mask: torch.Tensor
+    ):
+        """Given a `latents` tensor, adds the mask and image latents channels required for inpainting.
+
+        Standard (non-inpainting) SD UNet models expect an input with shape (N, 4, H, W). Inpainting models expect an
+        input of shape (N, 9, H, W). The 9 channels are defined as follows:
+        - Channel 0-3: The latents being denoised.
+        - Channel 4: The mask indicating which parts of the image are being inpainted.
+        - Channel 5-8: The latent representation of the masked reference image being inpainted.
+
+        This function assumes that the same mask and base image should apply to all items in the batch.
+        """
+        # Validate assumptions about input tensor shapes.
+        batch_size, latent_channels, latent_height, latent_width = latents.shape
+        assert latent_channels == 4
+        assert masked_ref_image_latents.shape == [1, 4, latent_height, latent_width]
+        assert inpainting_mask == [1, 1, latent_height, latent_width]
+
+        # Repeat original_image_latents and inpainting_mask to match the latents batch size.
+        original_image_latents = masked_ref_image_latents.expand(batch_size, -1, -1, -1)
+        inpainting_mask = inpainting_mask.expand(batch_size, -1, -1, -1)
+
+        # Concatenate along the channel dimension.
+        return torch.cat([latents, inpainting_mask, original_image_latents], dim=1)
+
    def latents_from_embeddings(
        self,
        latents: torch.Tensor,
-        num_inference_steps: int,
        scheduler_step_kwargs: dict[str, Any],
        conditioning_data: TextConditioningData,
-        *,
        noise: Optional[torch.Tensor],
+        seed: int,
        timesteps: torch.Tensor,
        init_timestep: torch.Tensor,
-        additional_guidance: List[Callable] = None,
-        callback: Callable[[PipelineIntermediateState], None] = None,
-        control_data: List[ControlNetData] = None,
+        callback: Callable[[PipelineIntermediateState], None],
+        control_data: list[ControlNetData] | None = None,
        ip_adapter_data: Optional[list[IPAdapterData]] = None,
        t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
        mask: Optional[torch.Tensor] = None,
        masked_latents: Optional[torch.Tensor] = None,
-        gradient_mask: Optional[bool] = False,
-        seed: int,
+        is_gradient_mask: bool = False,
    ) -> torch.Tensor:
-        if init_timestep.shape[0] == 0:
-            return latents
+        """Denoise the latents.

-        if additional_guidance is None:
-            additional_guidance = []
+        Args:
+            latents: The latent-space image to denoise.
+                - If we are inpainting, this is the initial latent image before noise has been added.
+                - If we are generating a new image, this should be initialized to zeros.
+                - In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner).
+            scheduler_step_kwargs: kwargs forwarded to the scheduler.step() method.
+            conditioning_data: Text conditionging data.
+            noise: Noise used for two purposes:
+                1. Used by the scheduler to noise the initial `latents` before denoising.
+                2. Used to noise the `masked_latents` when inpainting.
+                `noise` should be None if the `latents` tensor has already been noised.
+            seed: The seed used to generate the noise for the denoising process.
+                HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
+                same noise used earlier in the pipeline. This should really be handled in a clearer way.
+            timesteps: The timestep schedule for the denoising process.
+            init_timestep: The first timestep in the schedule.
+                TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the
+                case, and remove this duplicate param.
+            callback: A callback function that is called to report progress during the denoising process.
+            control_data: ControlNet data.
+            ip_adapter_data: IP-Adapter data.
+            t2i_adapter_data: T2I-Adapter data.
+            mask: A mask indicating which parts of the image are being inpainted. The presence of mask is used to
+                determine whether we are inpainting or not. `mask` should have the same spatial dimensions as the
+                `latents` tensor.
+                TODO(ryand): Check and document the expected dtype, range, and values used to represent
+                foreground/background.
+            masked_latents: A latent-space representation of a masked inpainting reference image. This tensor is only
+                used if an *inpainting* model is being used i.e. this tensor is not used when inpainting with a standard
+                SD UNet model.
+            is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not.
+        """
+        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
+        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
+        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
+            return latents

        orig_latents = latents.clone()

        batch_size = latents.shape[0]
-        batched_t = init_timestep.expand(batch_size)
+        batched_init_timestep = init_timestep.expand(batch_size)

+        # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
        if noise is not None:
+            # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
+            # full noise. Investigate the history of why this got commented out.
            # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
-            latents = self.scheduler.add_noise(latents, noise, batched_t)
+            latents = self.scheduler.add_noise(latents, noise, batched_init_timestep)

-        if mask is not None:
-            if is_inpainting_model(self.unet):
-                if masked_latents is None:
-                    raise Exception("Source image required for inpaint mask when inpaint model used!")
-
-                self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
-                    self._unet_forward, mask, masked_latents
-                )
-            else:
-                # if no noise provided, noisify unmasked area based on seed
-                if noise is None:
-                    noise = torch.randn(
-                        orig_latents.shape,
-                        dtype=torch.float32,
-                        device="cpu",
-                        generator=torch.Generator(device="cpu").manual_seed(seed),
-                    ).to(device=orig_latents.device, dtype=orig_latents.dtype)
-
-                additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise, gradient_mask))
-
-        try:
-            latents = self.generate_latents_from_embeddings(
-                latents,
-                timesteps,
-                conditioning_data,
-                scheduler_step_kwargs=scheduler_step_kwargs,
-                additional_guidance=additional_guidance,
-                control_data=control_data,
-                ip_adapter_data=ip_adapter_data,
-                t2i_adapter_data=t2i_adapter_data,
-                callback=callback,
-            )
-        finally:
-            self.invokeai_diffuser.model_forward_callback = self._unet_forward
-
-        # restore unmasked part after the last step is completed
-        # in-process masking happens before each step
-        if mask is not None:
-            if gradient_mask:
-                latents = torch.where(mask > 0, latents, orig_latents)
-            else:
-                latents = torch.lerp(
-                    orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
-                )
-
-        return latents
-
-    def generate_latents_from_embeddings(
-        self,
-        latents: torch.Tensor,
-        timesteps,
-        conditioning_data: TextConditioningData,
-        scheduler_step_kwargs: dict[str, Any],
-        *,
-        additional_guidance: List[Callable] = None,
-        control_data: List[ControlNetData] = None,
-        ip_adapter_data: Optional[list[IPAdapterData]] = None,
-        t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
-        callback: Callable[[PipelineIntermediateState], None] = None,
-    ) -> torch.Tensor:
        self._adjust_memory_efficient_attention(latents)
-        if additional_guidance is None:
-            additional_guidance = []

-        batch_size = latents.shape[0]
+        # Handle mask guidance (a.k.a. inpainting).
+        mask_guidance: AddsMaskGuidance | None = None
+        if mask is not None and not is_inpainting_model(self.unet):
+            # We are doing inpainting, since a mask is provided, but we are not using an inpainting model, so we will
+            # apply mask guidance to the latents.

-        if timesteps.shape[0] == 0:
-            return latents
+            # 'noise' might be None if the latents have already been noised (e.g. when running the SDXL refiner).
+            # We still need noise for inpainting, so we generate it from the seed here.
+            if noise is None:
+                noise = torch.randn(
+                    orig_latents.shape,
+                    dtype=torch.float32,
+                    device="cpu",
+                    generator=torch.Generator(device="cpu").manual_seed(seed),
+                ).to(device=orig_latents.device, dtype=orig_latents.dtype)
+
+            mask_guidance = AddsMaskGuidance(
+                mask=mask,
+                mask_latents=orig_latents,
+                scheduler=self.scheduler,
+                noise=noise,
+                is_gradient_mask=is_gradient_mask,
+            )

        use_ip_adapter = ip_adapter_data is not None
        use_regional_prompting = (
            conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None
        )
        unet_attention_patcher = None
-        self.use_ip_adapter = use_ip_adapter
        attn_ctx = nullcontext()

        if use_ip_adapter or use_regional_prompting:
@@ -402,28 +376,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model)

        with attn_ctx:
-            if callback is not None:
-                callback(
-                    PipelineIntermediateState(
-                        step=-1,
-                        order=self.scheduler.order,
-                        total_steps=len(timesteps),
-                        timestep=self.scheduler.config.num_train_timesteps,
-                        latents=latents,
-                    )
+            callback(
+                PipelineIntermediateState(
+                    step=-1,
+                    order=self.scheduler.order,
+                    total_steps=len(timesteps),
+                    timestep=self.scheduler.config.num_train_timesteps,
+                    latents=latents,
                )
+            )

-            # print("timesteps:", timesteps)
            for i, t in enumerate(self.progress_bar(timesteps)):
                batched_t = t.expand(batch_size)
                step_output = self.step(
-                    batched_t,
-                    latents,
-                    conditioning_data,
+                    t=batched_t,
+                    latents=latents,
+                    conditioning_data=conditioning_data,
                    step_index=i,
                    total_step_count=len(timesteps),
                    scheduler_step_kwargs=scheduler_step_kwargs,
-                    additional_guidance=additional_guidance,
+                    mask_guidance=mask_guidance,
+                    mask=mask,
+                    masked_latents=masked_latents,
                    control_data=control_data,
                    ip_adapter_data=ip_adapter_data,
                    t2i_adapter_data=t2i_adapter_data,
@@ -431,19 +405,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                latents = step_output.prev_sample
                predicted_original = getattr(step_output, "pred_original_sample", None)

-                if callback is not None:
-                    callback(
-                        PipelineIntermediateState(
-                            step=i,
-                            order=self.scheduler.order,
-                            total_steps=len(timesteps),
-                            timestep=int(t),
-                            latents=latents,
-                            predicted_original=predicted_original,
-                        )
+                callback(
+                    PipelineIntermediateState(
+                        step=i,
+                        order=self.scheduler.order,
+                        total_steps=len(timesteps),
+                        timestep=int(t),
+                        latents=latents,
+                        predicted_original=predicted_original,
                    )
+                )

-            return latents
+        # restore unmasked part after the last step is completed
+        # in-process masking happens before each step
+        if mask is not None:
+            if is_gradient_mask:
+                latents = torch.where(mask > 0, latents, orig_latents)
+            else:
+                latents = torch.lerp(
+                    orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
+                )
+
+        return latents

    @torch.inference_mode()
    def step(
@@ -454,19 +437,20 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        step_index: int,
        total_step_count: int,
        scheduler_step_kwargs: dict[str, Any],
-        additional_guidance: List[Callable] = None,
-        control_data: List[ControlNetData] = None,
+        mask_guidance: AddsMaskGuidance | None,
+        mask: torch.Tensor | None,
+        masked_latents: torch.Tensor | None,
+        control_data: list[ControlNetData] | None = None,
        ip_adapter_data: Optional[list[IPAdapterData]] = None,
        t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
    ):
        # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
        timestep = t[0]
-        if additional_guidance is None:
-            additional_guidance = []

-        # one day we will expand this extension point, but for now it just does denoise masking
-        for guidance in additional_guidance:
-            latents = guidance(latents, timestep)
+        # Handle masked image-to-image (a.k.a inpainting).
+        if mask_guidance is not None:
+            # NOTE: This is intentionally done *before* self.scheduler.scale_model_input(...).
+            latents = mask_guidance(latents, timestep)

        # TODO: should this scaling happen here or inside self._unet_forward?
        #     i.e. before or after passing it to InvokeAIDiffuserComponent
@@ -514,6 +498,31 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

            down_intrablock_additional_residuals = accum_adapter_state

+        # Handle inpainting models.
+        if is_inpainting_model(self.unet):
+            # NOTE: These calls to add_inpainting_channels_to_latents(...) are intentionally done *after*
+            # self.scheduler.scale_model_input(...) so that the scaling is not applied to the mask or reference image
+            # latents.
+            if mask is not None:
+                if masked_latents is None:
+                    raise ValueError("Source image required for inpaint mask when inpaint model used!")
+                latent_model_input = self.add_inpainting_channels_to_latents(
+                    latents=latent_model_input, masked_ref_image_latents=masked_latents, inpainting_mask=mask
+                )
+            else:
+                # We are using an inpainting model, but no mask was provided, so we are not really "inpainting".
+                # We generate a global mask and empty original image so that we can still generate in this
+                # configuration.
+                # TODO(ryand): Should we just raise an exception here instead? I can't think of a use case for wanting
+                # to do this.
+                # TODO(ryand): If we decide that there is a good reason to keep this, then we should generate the 'fake'
+                # mask and original image once rather than on every denoising step.
+                latent_model_input = self.add_inpainting_channels_to_latents(
+                    latents=latent_model_input,
+                    masked_ref_image_latents=torch.zeros_like(latent_model_input[:1]),
+                    inpainting_mask=torch.ones_like(latent_model_input[:1, :1]),
+                )
+
        uc_noise_pred, c_noise_pred = self.invokeai_diffuser.do_unet_step(
            sample=latent_model_input,
            timestep=t,  # TODO: debug how handled batched and non batched timesteps
@@ -542,17 +551,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        # compute the previous noisy sample x_t -> x_t-1
        step_output = self.scheduler.step(noise_pred, timestep, latents, **scheduler_step_kwargs)

-        # TODO: discuss injection point options. For now this is a patch to get progress images working with inpainting again.
-        for guidance in additional_guidance:
-            # apply the mask to any "denoised" or "pred_original_sample" fields
+        # TODO: discuss injection point options. For now this is a patch to get progress images working with inpainting
+        # again.
+        if mask_guidance is not None:
+            # Apply the mask to any "denoised" or "pred_original_sample" fields.
            if hasattr(step_output, "denoised"):
-                step_output.pred_original_sample = guidance(step_output.denoised, self.scheduler.timesteps[-1])
+                step_output.pred_original_sample = mask_guidance(step_output.denoised, self.scheduler.timesteps[-1])
            elif hasattr(step_output, "pred_original_sample"):
-                step_output.pred_original_sample = guidance(
+                step_output.pred_original_sample = mask_guidance(
                    step_output.pred_original_sample, self.scheduler.timesteps[-1]
                )
            else:
-                step_output.pred_original_sample = guidance(latents, self.scheduler.timesteps[-1])
+                step_output.pred_original_sample = mask_guidance(latents, self.scheduler.timesteps[-1])

        return step_output

@@ -575,17 +585,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        **kwargs,
    ):
        """predict the noise residual"""
-        if is_inpainting_model(self.unet) and latents.size(1) == 4:
-            # Pad out normal non-inpainting inputs for an inpainting model.
-            # FIXME: There are too many layers of functions and we have too many different ways of
-            #     overriding things! This should get handled in a way more consistent with the other
-            #     use of AddsMaskLatents.
-            latents = AddsMaskLatents(
-                self._unet_forward,
-                mask=torch.ones_like(latents[:1, :1], device=latents.device, dtype=latents.dtype),
-                initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype),
-            ).add_mask_channels(latents)
-
        # First three args should be positional, not keywords, so torch hooks can see them.
        return self.unet(
            latents,
--- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
+++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+
+import copy
+from dataclasses import dataclass
+from typing import Any, Callable, Optional
+
+import torch
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+
+from invokeai.backend.stable_diffusion.diffusers_pipeline import (
+    ControlNetData,
+    PipelineIntermediateState,
+    StableDiffusionGeneratorPipeline,
+)
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import TextConditioningData
+from invokeai.backend.tiles.utils import TBLR
+
+# The maximum number of regions with compatible sizes that will be batched together.
+# Larger batch sizes improve speed, but require more device memory.
+MAX_REGION_BATCH_SIZE = 4
+
+
+@dataclass
+class MultiDiffusionRegionConditioning:
+    # Region coords in latent space.
+    region: TBLR
+    text_conditioning_data: TextConditioningData
+    control_data: list[ControlNetData]
+
+
+class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
+    """A Stable Diffusion pipeline that uses Multi-Diffusion (https://arxiv.org/pdf/2302.08113) for denoising."""
+
+    def _split_into_region_batches(
+        self, multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning]
+    ) -> list[list[MultiDiffusionRegionConditioning]]:
+        # Group the regions by shape. Only regions with the same shape can be batched together.
+        conditioning_by_shape: dict[tuple[int, int], list[MultiDiffusionRegionConditioning]] = {}
+        for region_conditioning in multi_diffusion_conditioning:
+            shape_hw = (
+                region_conditioning.region.bottom - region_conditioning.region.top,
+                region_conditioning.region.right - region_conditioning.region.left,
+            )
+            # In python, a tuple of hashable objects is hashable, so can be used as a key in a dict.
+            if shape_hw not in conditioning_by_shape:
+                conditioning_by_shape[shape_hw] = []
+            conditioning_by_shape[shape_hw].append(region_conditioning)
+
+        # Split the regions into batches, respecting the MAX_REGION_BATCH_SIZE constraint.
+        region_conditioning_batches = []
+        for region_conditioning_batch in conditioning_by_shape.values():
+            for i in range(0, len(region_conditioning_batch), MAX_REGION_BATCH_SIZE):
+                region_conditioning_batches.append(region_conditioning_batch[i : i + MAX_REGION_BATCH_SIZE])
+
+        return region_conditioning_batches
+
+    def _check_regional_prompting(self, multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning]):
+        """Check the input conditioning and confirm that regional prompting is not used."""
+        for region_conditioning in multi_diffusion_conditioning:
+            if (
+                region_conditioning.text_conditioning_data.cond_regions is not None
+                or region_conditioning.text_conditioning_data.uncond_regions is not None
+            ):
+                raise NotImplementedError("Regional prompting is not yet supported in Multi-Diffusion.")
+
+    def multi_diffusion_denoise(
+        self,
+        multi_diffusion_conditioning: list[MultiDiffusionRegionConditioning],
+        latents: torch.Tensor,
+        scheduler_step_kwargs: dict[str, Any],
+        noise: Optional[torch.Tensor],
+        timesteps: torch.Tensor,
+        init_timestep: torch.Tensor,
+        callback: Callable[[PipelineIntermediateState], None],
+    ) -> torch.Tensor:
+        self._check_regional_prompting(multi_diffusion_conditioning)
+
+        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
+        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
+        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
+            return latents
+
+        batch_size, _, latent_height, latent_width = latents.shape
+        batched_init_timestep = init_timestep.expand(batch_size)
+
+        # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
+        if noise is not None:
+            # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
+            # full noise. Investigate the history of why this got commented out.
+            # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
+            latents = self.scheduler.add_noise(latents, noise, batched_init_timestep)
+
+        # TODO(ryand): Look into the implications of passing in latents here that are larger than they will be after
+        # cropping into regions.
+        self._adjust_memory_efficient_attention(latents)
+
+        # Populate a weighted mask that will be used to combine the results from each region after every step.
+        # For now, we assume that each region has the same weight (1.0).
+        region_weight_mask = torch.zeros(
+            (1, 1, latent_height, latent_width), device=latents.device, dtype=latents.dtype
+        )
+        for region_conditioning in multi_diffusion_conditioning:
+            region = region_conditioning.region
+            region_weight_mask[:, :, region.top : region.bottom, region.left : region.right] += 1.0
+
+        # Group the region conditioning into batches for faster processing.
+        # region_conditioning_batches[b][r] is the r'th region in the b'th batch.
+        region_conditioning_batches = self._split_into_region_batches(multi_diffusion_conditioning)
+
+        # Many of the diffusers schedulers are stateful (i.e. they update internal state in each call to step()). Since
+        # we are calling step() multiple times at the same timestep (once for each region batch), we must maintain a
+        # separate scheduler state for each region batch.
+        region_batch_schedulers: list[SchedulerMixin] = [
+            copy.deepcopy(self.scheduler) for _ in region_conditioning_batches
+        ]
+
+        callback(
+            PipelineIntermediateState(
+                step=-1,
+                order=self.scheduler.order,
+                total_steps=len(timesteps),
+                timestep=self.scheduler.config.num_train_timesteps,
+                latents=latents,
+            )
+        )
+
+        for i, t in enumerate(self.progress_bar(timesteps)):
+            batched_t = t.expand(batch_size)
+
+            merged_latents = torch.zeros_like(latents)
+            merged_pred_original: torch.Tensor | None = None
+            for region_batch_idx, region_conditioning_batch in enumerate(region_conditioning_batches):
+                # Switch to the scheduler for the region batch.
+                self.scheduler = region_batch_schedulers[region_batch_idx]
+
+                # TODO(ryand): This logic has not yet been tested with input latents with a batch_size > 1.
+
+                # Prepare the latents for the region batch.
+                batch_latents = torch.cat(
+                    [
+                        latents[
+                            :,
+                            :,
+                            region_conditioning.region.top : region_conditioning.region.bottom,
+                            region_conditioning.region.left : region_conditioning.region.right,
+                        ]
+                        for region_conditioning in region_conditioning_batch
+                    ],
+                )
+
+                # TODO(ryand): Do we have to repeat the text_conditioning_data to match the batch size? Or does step()
+                # handle broadcasting properly?
+
+                # TODO(ryand): Resume here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+                # Run the denoising step on the region.
+                step_output = self.step(
+                    t=batched_t,
+                    latents=batch_latents,
+                    conditioning_data=region_conditioning.text_conditioning_data,
+                    step_index=i,
+                    total_step_count=total_step_count,
+                    scheduler_step_kwargs=scheduler_step_kwargs,
+                    mask_guidance=None,
+                    mask=None,
+                    masked_latents=None,
+                    control_data=region_conditioning.control_data,
+                )
+                # Run a denoising step on the region.
+                # step_output = self._region_step(
+                #     region_conditioning=region_conditioning,
+                #     t=batched_t,
+                #     latents=latents,
+                #     step_index=i,
+                #     total_step_count=len(timesteps),
+                #     scheduler_step_kwargs=scheduler_step_kwargs,
+                # )
+
+                # Store the results from the region.
+                region = region_conditioning.region
+                merged_latents[:, :, region.top : region.bottom, region.left : region.right] += step_output.prev_sample
+                pred_orig_sample = getattr(step_output, "pred_original_sample", None)
+                if pred_orig_sample is not None:
+                    # If one region has pred_original_sample, then we can assume that all regions will have it, because
+                    # they all use the same scheduler.
+                    if merged_pred_original is None:
+                        merged_pred_original = torch.zeros_like(latents)
+                    merged_pred_original[:, :, region.top : region.bottom, region.left : region.right] += (
+                        pred_orig_sample
+                    )
+
+            # Normalize the merged results.
+            latents = torch.where(region_weight_mask > 0, merged_latents / region_weight_mask, merged_latents)
+            predicted_original = None
+            if merged_pred_original is not None:
+                predicted_original = torch.where(
+                    region_weight_mask > 0, merged_pred_original / region_weight_mask, merged_pred_original
+                )
+
+            callback(
+                PipelineIntermediateState(
+                    step=i,
+                    order=self.scheduler.order,
+                    total_steps=len(timesteps),
+                    timestep=int(t),
+                    latents=latents,
+                    predicted_original=predicted_original,
+                )
+            )
+
+        return latents
+
+    @torch.inference_mode()
+    def _region_batch_step(
+        self,
+        region_conditioning: MultiDiffusionRegionConditioning,
+        t: torch.Tensor,
+        latents: torch.Tensor,
+        step_index: int,
+        total_step_count: int,
+        scheduler_step_kwargs: dict[str, Any],
+    ):
+        # Crop the inputs to the region.
+        region_latents = latents[
+            :,
+            :,
+            region_conditioning.region.top : region_conditioning.region.bottom,
+            region_conditioning.region.left : region_conditioning.region.right,
+        ]
+
+        # Run the denoising step on the region.
+        return self.step(
+            t=t,
+            latents=region_latents,
+            conditioning_data=region_conditioning.text_conditioning_data,
+            step_index=step_index,
+            total_step_count=total_step_count,
+            scheduler_step_kwargs=scheduler_step_kwargs,
+            mask_guidance=None,
+            mask=None,
+            masked_latents=None,
+            control_data=region_conditioning.control_data,
+        )
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -65,6 +65,18 @@ class TextualInversionModelRaw(RawModel):

        return result

+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
+    ) -> None:
+        if not torch.cuda.is_available():
+            return
+        for emb in [self.embedding, self.embedding_2]:
+            if emb is not None:
+                emb.to(device=device, dtype=dtype, non_blocking=non_blocking)
+

 class TextualInversionManager(BaseTextualInversionManager):
    """TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library."""
--- a/invokeai/backend/util/silence_warnings.py
+++ b/invokeai/backend/util/silence_warnings.py
@@ -1,29 +1,36 @@
-"""Context class to silence transformers and diffusers warnings."""
-
 import warnings
-from typing import Any
+from contextlib import ContextDecorator

-from diffusers import logging as diffusers_logging
+from diffusers.utils import logging as diffusers_logging
 from transformers import logging as transformers_logging


-class SilenceWarnings(object):
-    """Use in context to temporarily turn off warnings from transformers & diffusers modules.
+# Inherit from ContextDecorator to allow using SilenceWarnings as both a context manager and a decorator.
+class SilenceWarnings(ContextDecorator):
+    """A context manager that disables warnings from transformers & diffusers modules while active.

+    As context manager:
+    ```
    with SilenceWarnings():
        # do something
+    ```
+
+    As decorator:
+    ```
+    @SilenceWarnings()
+    def some_function():
+        # do something
+    ```
    """

-    def __init__(self) -> None:
-        self.transformers_verbosity = transformers_logging.get_verbosity()
-        self.diffusers_verbosity = diffusers_logging.get_verbosity()
-
    def __enter__(self) -> None:
+        self._transformers_verbosity = transformers_logging.get_verbosity()
+        self._diffusers_verbosity = diffusers_logging.get_verbosity()
        transformers_logging.set_verbosity_error()
        diffusers_logging.set_verbosity_error()
        warnings.simplefilter("ignore")

-    def __exit__(self, *args: Any) -> None:
-        transformers_logging.set_verbosity(self.transformers_verbosity)
-        diffusers_logging.set_verbosity(self.diffusers_verbosity)
+    def __exit__(self, *args) -> None:
+        transformers_logging.set_verbosity(self._transformers_verbosity)
+        diffusers_logging.set_verbosity(self._diffusers_verbosity)
        warnings.simplefilter("default")
--- a/invokeai/backend/util/util.py
+++ b/invokeai/backend/util/util.py
@@ -3,12 +3,9 @@ import io
 import os
 import re
 import unicodedata
-import warnings
 from pathlib import Path

-from diffusers import logging as diffusers_logging
 from PIL import Image
-from transformers import logging as transformers_logging

 # actual size of a gig
 GIG = 1073741824
@@ -80,21 +77,3 @@ class Chdir(object):

    def __exit__(self, *args):
        os.chdir(self.original)
-
-
-class SilenceWarnings(object):
-    """Context manager to temporarily lower verbosity of diffusers & transformers warning messages."""
-
-    def __enter__(self):
-        """Set verbosity to error."""
-        self.transformers_verbosity = transformers_logging.get_verbosity()
-        self.diffusers_verbosity = diffusers_logging.get_verbosity()
-        transformers_logging.set_verbosity_error()
-        diffusers_logging.set_verbosity_error()
-        warnings.simplefilter("ignore")
-
-    def __exit__(self, type, value, traceback):
-        """Restore logger verbosity to state before context was entered."""
-        transformers_logging.set_verbosity(self.transformers_verbosity)
-        diffusers_logging.set_verbosity(self.diffusers_verbosity)
-        warnings.simplefilter("default")
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketModelInstall.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketModelInstall.ts
@@ -5,43 +5,122 @@ import {
  socketModelInstallCancelled,
  socketModelInstallComplete,
  socketModelInstallDownloadProgress,
+  socketModelInstallDownloadsComplete,
+  socketModelInstallDownloadStarted,
  socketModelInstallError,
+  socketModelInstallStarted,
 } from 'services/events/actions';

+/**
+ * A model install has two main stages - downloading and installing. All these events are namespaced under `model_install_`
+ * which is a bit misleading. For example, a `model_install_started` event is actually fired _after_ the model has fully
+ * downloaded and is being "physically" installed.
+ *
+ * Note: the download events are only fired for remote model installs, not local.
+ *
+ * Here's the expected flow:
+ * - API receives install request, model manager preps the install
+ * - `model_install_download_started` fired when the download starts
+ * - `model_install_download_progress` fired continually until the download is complete
+ * - `model_install_download_complete` fired when the download is complete
+ * - `model_install_started` fired when the "physical" installation starts
+ * - `model_install_complete` fired when the installation is complete
+ * - `model_install_cancelled` fired if the installation is cancelled
+ * - `model_install_error` fired if the installation has an error
+ */
+
+const selectModelInstalls = modelsApi.endpoints.listModelInstalls.select();
+
 export const addModelInstallEventListener = (startAppListening: AppStartListening) => {
  startAppListening({
-    actionCreator: socketModelInstallDownloadProgress,
-    effect: async (action, { dispatch }) => {
-      const { bytes, total_bytes, id } = action.payload.data;
+    actionCreator: socketModelInstallDownloadStarted,
+    effect: async (action, { dispatch, getState }) => {
+      const { id } = action.payload.data;
+      const { data } = selectModelInstalls(getState());

-      dispatch(
-        modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
-          const modelImport = draft.find((m) => m.id === id);
-          if (modelImport) {
-            modelImport.bytes = bytes;
-            modelImport.total_bytes = total_bytes;
-            modelImport.status = 'downloading';
-          }
-          return draft;
-        })
-      );
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'downloading';
+            }
+            return draft;
+          })
+        );
+      }
+    },
+  });
+
+  startAppListening({
+    actionCreator: socketModelInstallStarted,
+    effect: async (action, { dispatch, getState }) => {
+      const { id } = action.payload.data;
+      const { data } = selectModelInstalls(getState());
+
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'running';
+            }
+            return draft;
+          })
+        );
+      }
+    },
+  });
+
+  startAppListening({
+    actionCreator: socketModelInstallDownloadProgress,
+    effect: async (action, { dispatch, getState }) => {
+      const { bytes, total_bytes, id } = action.payload.data;
+      const { data } = selectModelInstalls(getState());
+
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.bytes = bytes;
+              modelImport.total_bytes = total_bytes;
+              modelImport.status = 'downloading';
+            }
+            return draft;
+          })
+        );
+      }
    },
  });

  startAppListening({
    actionCreator: socketModelInstallComplete,
-    effect: (action, { dispatch }) => {
+    effect: (action, { dispatch, getState }) => {
      const { id } = action.payload.data;

-      dispatch(
-        modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
-          const modelImport = draft.find((m) => m.id === id);
-          if (modelImport) {
-            modelImport.status = 'completed';
-          }
-          return draft;
-        })
-      );
+      const { data } = selectModelInstalls(getState());
+
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'completed';
+            }
+            return draft;
+          })
+        );
+      }
+
      dispatch(api.util.invalidateTags([{ type: 'ModelConfig', id: LIST_TAG }]));
      dispatch(api.util.invalidateTags([{ type: 'ModelScanFolderResults', id: LIST_TAG }]));
    },
@@ -49,37 +128,69 @@ export const addModelInstallEventListener = (startAppListening: AppStartListenin

  startAppListening({
    actionCreator: socketModelInstallError,
-    effect: (action, { dispatch }) => {
+    effect: (action, { dispatch, getState }) => {
      const { id, error, error_type } = action.payload.data;
+      const { data } = selectModelInstalls(getState());

-      dispatch(
-        modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
-          const modelImport = draft.find((m) => m.id === id);
-          if (modelImport) {
-            modelImport.status = 'error';
-            modelImport.error_reason = error_type;
-            modelImport.error = error;
-          }
-          return draft;
-        })
-      );
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'error';
+              modelImport.error_reason = error_type;
+              modelImport.error = error;
+            }
+            return draft;
+          })
+        );
+      }
    },
  });

  startAppListening({
    actionCreator: socketModelInstallCancelled,
-    effect: (action, { dispatch }) => {
+    effect: (action, { dispatch, getState }) => {
      const { id } = action.payload.data;
+      const { data } = selectModelInstalls(getState());

-      dispatch(
-        modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
-          const modelImport = draft.find((m) => m.id === id);
-          if (modelImport) {
-            modelImport.status = 'cancelled';
-          }
-          return draft;
-        })
-      );
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'cancelled';
+            }
+            return draft;
+          })
+        );
+      }
+    },
+  });
+
+  startAppListening({
+    actionCreator: socketModelInstallDownloadsComplete,
+    effect: (action, { dispatch, getState }) => {
+      const { id } = action.payload.data;
+      const { data } = selectModelInstalls(getState());
+
+      if (!data || !data.find((m) => m.id === id)) {
+        dispatch(api.util.invalidateTags([{ type: 'ModelInstalls' }]));
+      } else {
+        dispatch(
+          modelsApi.util.updateQueryData('listModelInstalls', undefined, (draft) => {
+            const modelImport = draft.find((m) => m.id === id);
+            if (modelImport) {
+              modelImport.status = 'downloads_done';
+            }
+            return draft;
+          })
+        );
+      }
    },
  });
 };
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -123,6 +123,13 @@ export type paths = {
     */
    delete: operations["prune_model_install_jobs"];
  };
+  "/api/v2/models/install/huggingface": {
+    /**
+     * Install Hugging Face Model
+     * @description Install a Hugging Face model using a string identifier.
+     */
+    get: operations["install_hugging_face_model"];
+  };
  "/api/v2/models/install/{id}": {
    /**
     * Get Model Install Job
@@ -3788,23 +3795,6 @@ export type components = {
     * @description Class to monitor and control a model download request.
     */
    DownloadJob: {
-      /**
-       * Source
-       * Format: uri
-       * @description Where to download from. Specific types specified in child classes.
-       */
-      source: string;
-      /**
-       * Dest
-       * Format: path
-       * @description Destination of downloaded model on local disk; a directory or file path
-       */
-      dest: string;
-      /**
-       * Access Token
-       * @description authorization token for protected resources
-       */
-      access_token?: string | null;
      /**
       * Id
       * @description Numeric ID of this job
@@ -3812,36 +3802,21 @@ export type components = {
       */
      id?: number;
      /**
-       * Priority
-       * @description Queue priority; lower values are higher priority
-       * @default 10
+       * Dest
+       * Format: path
+       * @description Initial destination of downloaded model on local disk; a directory or file path
       */
-      priority?: number;
+      dest: string;
+      /**
+       * Download Path
+       * @description Final location of downloaded file or directory
+       */
+      download_path?: string | null;
      /**
       * @description Status of the download
       * @default waiting
       */
      status?: components["schemas"]["DownloadJobStatus"];
-      /**
-       * Download Path
-       * @description Final location of downloaded file
-       */
-      download_path?: string | null;
-      /**
-       * Job Started
-       * @description Timestamp for when the download job started
-       */
-      job_started?: string | null;
-      /**
-       * Job Ended
-       * @description Timestamp for when the download job ende1d (completed or errored)
-       */
-      job_ended?: string | null;
-      /**
-       * Content Type
-       * @description Content type of downloaded file
-       */
-      content_type?: string | null;
      /**
       * Bytes
       * @description Bytes downloaded so far
@@ -3864,6 +3839,38 @@ export type components = {
       * @description Traceback of the exception that caused an error
       */
      error?: string | null;
+      /**
+       * Source
+       * Format: uri
+       * @description Where to download from. Specific types specified in child classes.
+       */
+      source: string;
+      /**
+       * Access Token
+       * @description authorization token for protected resources
+       */
+      access_token?: string | null;
+      /**
+       * Priority
+       * @description Queue priority; lower values are higher priority
+       * @default 10
+       */
+      priority?: number;
+      /**
+       * Job Started
+       * @description Timestamp for when the download job started
+       */
+      job_started?: string | null;
+      /**
+       * Job Ended
+       * @description Timestamp for when the download job ende1d (completed or errored)
+       */
+      job_ended?: string | null;
+      /**
+       * Content Type
+       * @description Content type of downloaded file
+       */
+      content_type?: string | null;
    };
    /**
     * DownloadJobStatus
@@ -7276,144 +7283,144 @@ export type components = {
      project_id: string | null;
    };
    InvocationOutputMap: {
-      pidi_image_processor: components["schemas"]["ImageOutput"];
-      image_mask_to_tensor: components["schemas"]["MaskOutput"];
-      vae_loader: components["schemas"]["VAEOutput"];
-      collect: components["schemas"]["CollectInvocationOutput"];
-      string_join_three: components["schemas"]["StringOutput"];
-      content_shuffle_image_processor: components["schemas"]["ImageOutput"];
-      random_range: components["schemas"]["IntegerCollectionOutput"];
-      ip_adapter: components["schemas"]["IPAdapterOutput"];
-      step_param_easing: components["schemas"]["FloatCollectionOutput"];
-      core_metadata: components["schemas"]["MetadataOutput"];
-      main_model_loader: components["schemas"]["ModelLoaderOutput"];
-      leres_image_processor: components["schemas"]["ImageOutput"];
-      calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
-      color_correct: components["schemas"]["ImageOutput"];
-      calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
-      float_range: components["schemas"]["FloatCollectionOutput"];
-      infill_cv2: components["schemas"]["ImageOutput"];
-      img_channel_multiply: components["schemas"]["ImageOutput"];
-      img_pad_crop: components["schemas"]["ImageOutput"];
-      sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
-      face_mask_detection: components["schemas"]["FaceMaskOutput"];
-      infill_lama: components["schemas"]["ImageOutput"];
-      mask_combine: components["schemas"]["ImageOutput"];
-      sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
-      segment_anything_processor: components["schemas"]["ImageOutput"];
-      merge_metadata: components["schemas"]["MetadataOutput"];
-      img_ilerp: components["schemas"]["ImageOutput"];
-      heuristic_resize: components["schemas"]["ImageOutput"];
-      cv_inpaint: components["schemas"]["ImageOutput"];
-      div: components["schemas"]["IntegerOutput"];
-      pair_tile_image: components["schemas"]["PairTileImageOutput"];
-      float_math: components["schemas"]["FloatOutput"];
-      img_channel_offset: components["schemas"]["ImageOutput"];
-      canvas_paste_back: components["schemas"]["ImageOutput"];
-      canny_image_processor: components["schemas"]["ImageOutput"];
-      integer_collection: components["schemas"]["IntegerCollectionOutput"];
-      freeu: components["schemas"]["UNetOutput"];
-      lresize: components["schemas"]["LatentsOutput"];
-      range_of_size: components["schemas"]["IntegerCollectionOutput"];
-      depth_anything_image_processor: components["schemas"]["ImageOutput"];
-      float_to_int: components["schemas"]["IntegerOutput"];
-      rand_int: components["schemas"]["IntegerOutput"];
-      lineart_anime_image_processor: components["schemas"]["ImageOutput"];
-      string_split: components["schemas"]["String2Output"];
-      img_nsfw: components["schemas"]["ImageOutput"];
-      string: components["schemas"]["StringOutput"];
-      mask_edge: components["schemas"]["ImageOutput"];
-      i2l: components["schemas"]["LatentsOutput"];
-      face_identifier: components["schemas"]["ImageOutput"];
-      compel: components["schemas"]["ConditioningOutput"];
-      esrgan: components["schemas"]["ImageOutput"];
-      seamless: components["schemas"]["SeamlessModeOutput"];
-      mask_from_id: components["schemas"]["ImageOutput"];
-      invert_tensor_mask: components["schemas"]["MaskOutput"];
-      rectangle_mask: components["schemas"]["MaskOutput"];
-      conditioning: components["schemas"]["ConditioningOutput"];
-      t2i_adapter: components["schemas"]["T2IAdapterOutput"];
-      string_collection: components["schemas"]["StringCollectionOutput"];
-      show_image: components["schemas"]["ImageOutput"];
-      dw_openpose_image_processor: components["schemas"]["ImageOutput"];
-      string_split_neg: components["schemas"]["StringPosNegOutput"];
-      conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
-      infill_patchmatch: components["schemas"]["ImageOutput"];
-      img_conv: components["schemas"]["ImageOutput"];
-      unsharp_mask: components["schemas"]["ImageOutput"];
-      metadata_item: components["schemas"]["MetadataItemOutput"];
-      image: components["schemas"]["ImageOutput"];
-      image_collection: components["schemas"]["ImageCollectionOutput"];
-      tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
-      lblend: components["schemas"]["LatentsOutput"];
-      float: components["schemas"]["FloatOutput"];
-      boolean_collection: components["schemas"]["BooleanCollectionOutput"];
-      color: components["schemas"]["ColorOutput"];
      midas_depth_image_processor: components["schemas"]["ImageOutput"];
-      zoe_depth_image_processor: components["schemas"]["ImageOutput"];
-      infill_rgba: components["schemas"]["ImageOutput"];
-      mlsd_image_processor: components["schemas"]["ImageOutput"];
+      lscale: components["schemas"]["LatentsOutput"];
+      string_split: components["schemas"]["String2Output"];
+      mask_edge: components["schemas"]["ImageOutput"];
+      content_shuffle_image_processor: components["schemas"]["ImageOutput"];
+      color_correct: components["schemas"]["ImageOutput"];
+      save_image: components["schemas"]["ImageOutput"];
+      show_image: components["schemas"]["ImageOutput"];
+      segment_anything_processor: components["schemas"]["ImageOutput"];
+      latents: components["schemas"]["LatentsOutput"];
+      lineart_image_processor: components["schemas"]["ImageOutput"];
+      hed_image_processor: components["schemas"]["ImageOutput"];
+      infill_lama: components["schemas"]["ImageOutput"];
+      infill_patchmatch: components["schemas"]["ImageOutput"];
+      float_collection: components["schemas"]["FloatCollectionOutput"];
+      denoise_latents: components["schemas"]["LatentsOutput"];
+      metadata: components["schemas"]["MetadataOutput"];
+      compel: components["schemas"]["ConditioningOutput"];
+      img_blur: components["schemas"]["ImageOutput"];
+      img_crop: components["schemas"]["ImageOutput"];
+      sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
+      img_ilerp: components["schemas"]["ImageOutput"];
+      img_paste: components["schemas"]["ImageOutput"];
+      core_metadata: components["schemas"]["MetadataOutput"];
+      lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
+      lora_selector: components["schemas"]["LoRASelectorOutput"];
+      create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
+      rectangle_mask: components["schemas"]["MaskOutput"];
+      noise: components["schemas"]["NoiseOutput"];
+      float_to_int: components["schemas"]["IntegerOutput"];
+      esrgan: components["schemas"]["ImageOutput"];
      merge_tiles_to_image: components["schemas"]["ImageOutput"];
      prompt_from_file: components["schemas"]["StringCollectionOutput"];
-      boolean: components["schemas"]["BooleanOutput"];
-      create_gradient_mask: components["schemas"]["GradientMaskOutput"];
-      rand_float: components["schemas"]["FloatOutput"];
-      img_mul: components["schemas"]["ImageOutput"];
-      controlnet: components["schemas"]["ControlOutput"];
-      latents_collection: components["schemas"]["LatentsCollectionOutput"];
-      img_lerp: components["schemas"]["ImageOutput"];
-      noise: components["schemas"]["NoiseOutput"];
-      iterate: components["schemas"]["IterateInvocationOutput"];
-      lineart_image_processor: components["schemas"]["ImageOutput"];
-      tomask: components["schemas"]["ImageOutput"];
-      integer: components["schemas"]["IntegerOutput"];
-      create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
-      clip_skip: components["schemas"]["CLIPSkipInvocationOutput"];
-      denoise_latents: components["schemas"]["LatentsOutput"];
-      string_join: components["schemas"]["StringOutput"];
-      scheduler: components["schemas"]["SchedulerOutput"];
-      model_identifier: components["schemas"]["ModelIdentifierOutput"];
-      normalbae_image_processor: components["schemas"]["ImageOutput"];
-      face_off: components["schemas"]["FaceOffOutput"];
-      hed_image_processor: components["schemas"]["ImageOutput"];
-      img_paste: components["schemas"]["ImageOutput"];
-      img_chan: components["schemas"]["ImageOutput"];
-      img_watermark: components["schemas"]["ImageOutput"];
-      l2i: components["schemas"]["ImageOutput"];
-      string_replace: components["schemas"]["StringOutput"];
-      color_map_image_processor: components["schemas"]["ImageOutput"];
-      tile_image_processor: components["schemas"]["ImageOutput"];
-      crop_latents: components["schemas"]["LatentsOutput"];
-      sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
-      add: components["schemas"]["IntegerOutput"];
-      sub: components["schemas"]["IntegerOutput"];
-      img_scale: components["schemas"]["ImageOutput"];
-      range: components["schemas"]["IntegerCollectionOutput"];
-      dynamic_prompt: components["schemas"]["StringCollectionOutput"];
-      img_crop: components["schemas"]["ImageOutput"];
-      infill_tile: components["schemas"]["ImageOutput"];
-      img_resize: components["schemas"]["ImageOutput"];
-      mediapipe_face_processor: components["schemas"]["ImageOutput"];
-      sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
-      lora_selector: components["schemas"]["LoRASelectorOutput"];
-      img_hue_adjust: components["schemas"]["ImageOutput"];
-      latents: components["schemas"]["LatentsOutput"];
-      lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
-      img_blur: components["schemas"]["ImageOutput"];
-      ideal_size: components["schemas"]["IdealSizeOutput"];
-      float_collection: components["schemas"]["FloatCollectionOutput"];
-      blank_image: components["schemas"]["ImageOutput"];
-      integer_math: components["schemas"]["IntegerOutput"];
-      lora_loader: components["schemas"]["LoRALoaderOutput"];
-      metadata: components["schemas"]["MetadataOutput"];
+      infill_rgba: components["schemas"]["ImageOutput"];
      sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"];
-      round_float: components["schemas"]["FloatOutput"];
-      sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
-      mul: components["schemas"]["IntegerOutput"];
-      alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
-      lscale: components["schemas"]["LatentsOutput"];
-      save_image: components["schemas"]["ImageOutput"];
+      lora_loader: components["schemas"]["LoRALoaderOutput"];
+      iterate: components["schemas"]["IterateInvocationOutput"];
+      t2i_adapter: components["schemas"]["T2IAdapterOutput"];
+      color_map_image_processor: components["schemas"]["ImageOutput"];
+      blank_image: components["schemas"]["ImageOutput"];
+      normalbae_image_processor: components["schemas"]["ImageOutput"];
+      canvas_paste_back: components["schemas"]["ImageOutput"];
+      string_split_neg: components["schemas"]["StringPosNegOutput"];
+      img_channel_offset: components["schemas"]["ImageOutput"];
+      face_mask_detection: components["schemas"]["FaceMaskOutput"];
+      cv_inpaint: components["schemas"]["ImageOutput"];
+      clip_skip: components["schemas"]["CLIPSkipInvocationOutput"];
+      invert_tensor_mask: components["schemas"]["MaskOutput"];
+      tomask: components["schemas"]["ImageOutput"];
+      main_model_loader: components["schemas"]["ModelLoaderOutput"];
+      img_watermark: components["schemas"]["ImageOutput"];
+      img_pad_crop: components["schemas"]["ImageOutput"];
+      random_range: components["schemas"]["IntegerCollectionOutput"];
+      mlsd_image_processor: components["schemas"]["ImageOutput"];
+      merge_metadata: components["schemas"]["MetadataOutput"];
+      string_join: components["schemas"]["StringOutput"];
+      vae_loader: components["schemas"]["VAEOutput"];
+      calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
      calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"];
+      mask_from_id: components["schemas"]["ImageOutput"];
+      zoe_depth_image_processor: components["schemas"]["ImageOutput"];
+      img_resize: components["schemas"]["ImageOutput"];
+      string_replace: components["schemas"]["StringOutput"];
+      face_identifier: components["schemas"]["ImageOutput"];
+      canny_image_processor: components["schemas"]["ImageOutput"];
+      collect: components["schemas"]["CollectInvocationOutput"];
+      infill_tile: components["schemas"]["ImageOutput"];
+      integer_collection: components["schemas"]["IntegerCollectionOutput"];
+      img_lerp: components["schemas"]["ImageOutput"];
+      step_param_easing: components["schemas"]["FloatCollectionOutput"];
+      lresize: components["schemas"]["LatentsOutput"];
+      img_mul: components["schemas"]["ImageOutput"];
+      create_gradient_mask: components["schemas"]["GradientMaskOutput"];
+      img_scale: components["schemas"]["ImageOutput"];
+      rand_float: components["schemas"]["FloatOutput"];
+      tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
+      calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
+      range_of_size: components["schemas"]["IntegerCollectionOutput"];
+      sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
+      heuristic_resize: components["schemas"]["ImageOutput"];
+      controlnet: components["schemas"]["ControlOutput"];
+      string: components["schemas"]["StringOutput"];
+      tile_image_processor: components["schemas"]["ImageOutput"];
+      metadata_item: components["schemas"]["MetadataItemOutput"];
+      freeu: components["schemas"]["UNetOutput"];
+      round_float: components["schemas"]["FloatOutput"];
+      conditioning: components["schemas"]["ConditioningOutput"];
+      ideal_size: components["schemas"]["IdealSizeOutput"];
+      float: components["schemas"]["FloatOutput"];
+      conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
+      alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
+      integer_math: components["schemas"]["IntegerOutput"];
+      string_collection: components["schemas"]["StringCollectionOutput"];
+      img_conv: components["schemas"]["ImageOutput"];
+      img_channel_multiply: components["schemas"]["ImageOutput"];
+      lblend: components["schemas"]["LatentsOutput"];
+      color: components["schemas"]["ColorOutput"];
+      image: components["schemas"]["ImageOutput"];
+      sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
+      image_collection: components["schemas"]["ImageCollectionOutput"];
+      model_identifier: components["schemas"]["ModelIdentifierOutput"];
+      l2i: components["schemas"]["ImageOutput"];
+      seamless: components["schemas"]["SeamlessModeOutput"];
+      boolean_collection: components["schemas"]["BooleanCollectionOutput"];
+      string_join_three: components["schemas"]["StringOutput"];
+      ip_adapter: components["schemas"]["IPAdapterOutput"];
+      add: components["schemas"]["IntegerOutput"];
+      crop_latents: components["schemas"]["LatentsOutput"];
+      float_range: components["schemas"]["FloatCollectionOutput"];
+      mul: components["schemas"]["IntegerOutput"];
+      dw_openpose_image_processor: components["schemas"]["ImageOutput"];
+      boolean: components["schemas"]["BooleanOutput"];
+      dynamic_prompt: components["schemas"]["StringCollectionOutput"];
+      mediapipe_face_processor: components["schemas"]["ImageOutput"];
+      i2l: components["schemas"]["LatentsOutput"];
+      latents_collection: components["schemas"]["LatentsCollectionOutput"];
+      integer: components["schemas"]["IntegerOutput"];
+      img_chan: components["schemas"]["ImageOutput"];
+      pair_tile_image: components["schemas"]["PairTileImageOutput"];
+      unsharp_mask: components["schemas"]["ImageOutput"];
+      img_hue_adjust: components["schemas"]["ImageOutput"];
+      lineart_anime_image_processor: components["schemas"]["ImageOutput"];
+      face_off: components["schemas"]["FaceOffOutput"];
+      mask_combine: components["schemas"]["ImageOutput"];
+      leres_image_processor: components["schemas"]["ImageOutput"];
+      image_mask_to_tensor: components["schemas"]["MaskOutput"];
+      sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
+      scheduler: components["schemas"]["SchedulerOutput"];
+      sub: components["schemas"]["IntegerOutput"];
+      pidi_image_processor: components["schemas"]["ImageOutput"];
+      infill_cv2: components["schemas"]["ImageOutput"];
+      div: components["schemas"]["IntegerOutput"];
+      img_nsfw: components["schemas"]["ImageOutput"];
+      depth_anything_image_processor: components["schemas"]["ImageOutput"];
+      sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
+      range: components["schemas"]["IntegerCollectionOutput"];
+      rand_int: components["schemas"]["IntegerOutput"];
+      float_math: components["schemas"]["FloatOutput"];
    };
    /**
     * InvocationStartedEvent
@@ -9443,6 +9450,49 @@ export type components = {
          [key: string]: number | string;
        })[];
    };
+    /**
+     * ModelInstallDownloadStartedEvent
+     * @description Event model for model_install_download_started
+     */
+    ModelInstallDownloadStartedEvent: {
+      /**
+       * Timestamp
+       * @description The timestamp of the event
+       */
+      timestamp: number;
+      /**
+       * Id
+       * @description The ID of the install job
+       */
+      id: number;
+      /**
+       * Source
+       * @description Source of the model; local path, repo_id or url
+       */
+      source: string;
+      /**
+       * Local Path
+       * @description Where model is downloading to
+       */
+      local_path: string;
+      /**
+       * Bytes
+       * @description Number of bytes downloaded so far
+       */
+      bytes: number;
+      /**
+       * Total Bytes
+       * @description Total size of download, including all files
+       */
+      total_bytes: number;
+      /**
+       * Parts
+       * @description Progress of downloading URLs that comprise the model, if any
+       */
+      parts: ({
+          [key: string]: number | string;
+        })[];
+    };
    /**
     * ModelInstallDownloadsCompleteEvent
     * @description Emitted once when an install job becomes active.
@@ -10671,8 +10721,9 @@ export type components = {
      /**
       * Size
       * @description The size of this file, in bytes
+       * @default 0
       */
-      size: number;
+      size?: number | null;
      /**
       * Sha256
       * @description SHA256 hash of this model (not always available)
@@ -14050,6 +14101,40 @@ export type operations = {
      };
    };
  };
+  /**
+   * Install Hugging Face Model
+   * @description Install a Hugging Face model using a string identifier.
+   */
+  install_hugging_face_model: {
+    parameters: {
+      query: {
+        /** @description Hugging Face repo_id to install */
+        source: string;
+      };
+    };
+    responses: {
+      /** @description The model is being installed */
+      201: {
+        content: {
+          "text/html": string;
+        };
+      };
+      /** @description Bad request */
+      400: {
+        content: never;
+      };
+      /** @description There is already a model corresponding to this path or repo_id */
+      409: {
+        content: never;
+      };
+      /** @description Validation Error */
+      422: {
+        content: {
+          "application/json": components["schemas"]["HTTPValidationError"];
+        };
+      };
+    };
+  };
  /**
   * Get Model Install Job
   * @description Return model install job corresponding to the given source. See the documentation for 'List Model Install Jobs'
--- a/invokeai/frontend/web/src/services/events/actions.ts
+++ b/invokeai/frontend/web/src/services/events/actions.ts
@@ -16,6 +16,7 @@ import type {
  ModelInstallCompleteEvent,
  ModelInstallDownloadProgressEvent,
  ModelInstallDownloadsCompleteEvent,
+  ModelInstallDownloadStartedEvent,
  ModelInstallErrorEvent,
  ModelInstallStartedEvent,
  ModelLoadCompleteEvent,
@@ -45,6 +46,9 @@ export const socketModelInstallStarted = createSocketAction<ModelInstallStartedE
 export const socketModelInstallDownloadProgress = createSocketAction<ModelInstallDownloadProgressEvent>(
  'ModelInstallDownloadProgressEvent'
 );
+export const socketModelInstallDownloadStarted = createSocketAction<ModelInstallDownloadStartedEvent>(
+  'ModelInstallDownloadStartedEvent'
+);
 export const socketModelInstallDownloadsComplete = createSocketAction<ModelInstallDownloadsCompleteEvent>(
  'ModelInstallDownloadsCompleteEvent'
 );
--- a/invokeai/frontend/web/src/services/events/types.ts
+++ b/invokeai/frontend/web/src/services/events/types.ts
@@ -9,6 +9,7 @@ export type InvocationCompleteEvent = S['InvocationCompleteEvent'];
 export type InvocationErrorEvent = S['InvocationErrorEvent'];
 export type ProgressImage = InvocationDenoiseProgressEvent['progress_image'];

+export type ModelInstallDownloadStartedEvent = S['ModelInstallDownloadStartedEvent'];
 export type ModelInstallDownloadProgressEvent = S['ModelInstallDownloadProgressEvent'];
 export type ModelInstallDownloadsCompleteEvent = S['ModelInstallDownloadsCompleteEvent'];
 export type ModelInstallCompleteEvent = S['ModelInstallCompleteEvent'];
@@ -49,6 +50,7 @@ export type ServerToClientEvents = {
  download_error: (payload: DownloadErrorEvent) => void;
  model_load_started: (payload: ModelLoadStartedEvent) => void;
  model_install_started: (payload: ModelInstallStartedEvent) => void;
+  model_install_download_started: (payload: ModelInstallDownloadStartedEvent) => void;
  model_install_download_progress: (payload: ModelInstallDownloadProgressEvent) => void;
  model_install_downloads_complete: (payload: ModelInstallDownloadsCompleteEvent) => void;
  model_install_complete: (payload: ModelInstallCompleteEvent) => void;
--- a/invokeai/invocation_api/init.py
+++ b/invokeai/invocation_api/init.py
@@ -31,7 +31,6 @@ from invokeai.app.invocations.fields import (
    WithMetadata,
    WithWorkflow,
 )
-from invokeai.app.invocations.latent import SchedulerOutput
 from invokeai.app.invocations.metadata import MetadataItemField, MetadataItemOutput, MetadataOutput
 from invokeai.app.invocations.model import (
    CLIPField,
@@ -64,6 +63,7 @@ from invokeai.app.invocations.primitives import (
    StringCollectionOutput,
    StringOutput,
 )
+from invokeai.app.invocations.scheduler import SchedulerOutput
 from invokeai.app.services.boards.boards_common import BoardDTO
 from invokeai.app.services.config.config_default import InvokeAIAppConfig
 from invokeai.app.services.image_records.image_records_common import ImageCategory
@@ -108,7 +108,7 @@ __all__ = [
    "WithBoard",
    "WithMetadata",
    "WithWorkflow",
-    # invokeai.app.invocations.latent
+    # invokeai.app.invocations.scheduler
    "SchedulerOutput",
    # invokeai.app.invocations.metadata
    "MetadataItemField",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -224,7 +224,7 @@ follow_imports = "skip" # skips type checking of the modules listed below
 module = [
  "invokeai.app.api.routers.models",
  "invokeai.app.invocations.compel",
-  "invokeai.app.invocations.latent",
+  "invokeai.app.invocations.denoise_latents",
  "invokeai.app.services.invocation_stats.invocation_stats_default",
  "invokeai.app.services.model_manager.model_manager_base",
  "invokeai.app.services.model_manager.model_manager_default",
--- a/tests/app/services/model_install/test_model_install.py
+++ b/tests/app/services/model_install/test_model_install.py
@@ -17,6 +17,7 @@ from invokeai.app.services.events.events_common import (
    ModelInstallCompleteEvent,
    ModelInstallDownloadProgressEvent,
    ModelInstallDownloadsCompleteEvent,
+    ModelInstallDownloadStartedEvent,
    ModelInstallStartedEvent,
 )
 from invokeai.app.services.model_install import (
@@ -252,7 +253,7 @@ def test_simple_download(mm2_installer: ModelInstallServiceBase, mm2_app_config:
    assert (mm2_app_config.models_path / model_record.path).exists()

    assert len(bus.events) == 5
-    assert isinstance(bus.events[0], ModelInstallDownloadProgressEvent)  # download starts
+    assert isinstance(bus.events[0], ModelInstallDownloadStartedEvent)  # download starts
    assert isinstance(bus.events[1], ModelInstallDownloadProgressEvent)  # download progresses
    assert isinstance(bus.events[2], ModelInstallDownloadsCompleteEvent)  # download completed
    assert isinstance(bus.events[3], ModelInstallStartedEvent)  # install started
Author	SHA1	Message	Date
Ryan Dick	6bcf48aa37	WIP - Started working towards MultiDiffusion batching.	2024-06-18 15:44:39 -04:00
Ryan Dick	b1bb1511fe	Delete rough notes.	2024-06-18 15:36:36 -04:00
Ryan Dick	99046a8145	Fix advanced scheduler behaviour in MultiDiffusionPipeline.	2024-06-18 15:36:36 -04:00
Ryan Dick	72be7e71e3	Fix handling of stateful schedulers in MultiDiffusionPipeline.	2024-06-18 15:36:36 -04:00
Ryan Dick	35adaf1c17	Connect TiledMultiDiffusionDenoiseLatents to the MultiDiffusionPipeline backend.	2024-06-18 15:36:34 -04:00
Ryan Dick	865c2335de	Remove regional conditioning logic from MultiDiffusionPipeline - it is not yet supported.	2024-06-18 15:35:52 -04:00
Ryan Dick	49ca42f84a	Initial (untested) implementation of MultiDiffusionPipeline.	2024-06-18 15:35:52 -04:00
Ryan Dick	493fcd8660	Remove inpainting support from MultiDiffusionPipeline.	2024-06-18 15:35:52 -04:00
Ryan Dick	20322d781e	Remove IP-Adapter and T2I-Adapter support from MultiDiffusionPipeline.	2024-06-18 15:35:52 -04:00
Ryan Dick	889d13e02a	Document plan for the rest of the MultiDiffusion implementation.	2024-06-18 15:35:52 -04:00
Ryan Dick	6ccd2a867b	Add detailed docstring to latents_from_embeddings().	2024-06-18 15:35:52 -04:00
Ryan Dick	5861fa1719	Copy StableDiffusionGeneratorPipeline as a starting point for a new MultiDiffusionPipeline.	2024-06-18 15:35:52 -04:00
Ryan Dick	dfd4beb62b	Simplify handling of inpainting models. Improve the in-code documentation around inpainting.	2024-06-18 15:35:52 -04:00
Ryan Dick	83df0c0df5	Minor tidying of latents_from_embeddings(...).	2024-06-18 15:35:52 -04:00
Ryan Dick	c58c4069a7	Consolidate latents_from_embeddings(...) and generate_latents_from_embeddings(...) into a single function.	2024-06-18 15:35:52 -04:00
Ryan Dick	3937fffa94	Fix invocation name of tiled_multi_diffusion_denoise_latents.	2024-06-18 15:35:52 -04:00
Ryan Dick	bbf5f67691	Improve clarity of comments regarded when 'noise' and 'latents' are expected to be set.	2024-06-18 15:35:52 -04:00
Ryan Dick	2f5c147b84	Fix static check errors on imports in diffusers_pipeline.py.	2024-06-18 15:35:52 -04:00
Ryan Dick	bd2839b748	Remove a condition for handling inpainting models that never resolves to True. The same logic is already applied earlier by AddsMaskLatents.	2024-06-18 15:35:52 -04:00
Ryan Dick	4f70dd7ce1	Add clarifying comment to explain why noise might be None in latents_from_embedding().	2024-06-18 15:35:52 -04:00
Ryan Dick	066672fbfd	Remove unused are_like_tensors() function.	2024-06-18 15:35:52 -04:00
Ryan Dick	abefaee4d1	Remove unused StableDiffusionGeneratorPipeline.use_ip_adapter member.	2024-06-18 15:35:52 -04:00
Ryan Dick	3254ba5904	Remove unused StableDiffusionGeneratorPipeline.control_model.	2024-06-18 15:35:52 -04:00
Ryan Dick	73a8c55852	Stricter typing for the is_gradient_mask: bool.	2024-06-18 15:35:52 -04:00
Ryan Dick	f82af7c22d	Fix typing of control_data to reflect that it can be None.	2024-06-18 15:35:52 -04:00
Ryan Dick	3aef717ef4	Fix typing of timesteps and init_timestep.	2024-06-18 15:35:52 -04:00
Ryan Dick	c2cf1137e9	Fix typing to reflect that the callback arg to latents_from_embeddings is never None.	2024-06-18 15:35:52 -04:00
Ryan Dick	803a24bc0a	Move seed above optional params.	2024-06-18 15:35:52 -04:00
Ryan Dick	7d24ad8ccd	Simplify handling of AddsMaskGuidance, and fix some related type errors.	2024-06-18 15:35:52 -04:00
Ryan Dick	cb389063b2	Remove unused num_inference_steps.	2024-06-18 15:35:52 -04:00
Ryan Dick	81b8a69e1a	WIP TiledMultiDiffusionDenoiseLatents. Updated parameter list and first half of the logic.	2024-06-18 15:35:50 -04:00
Ryan Dick	7ee5db87ad	Tidy DenoiseLatentsInvocation.prep_control_data(...) and fix some type errors.	2024-06-18 15:34:30 -04:00
Ryan Dick	66cf2c59bd	Make DenoiseLatentsInvocation.prep_control_data(...) a staticmethod so that it can be called externally.	2024-06-18 15:34:30 -04:00
Ryan Dick	3bad1367e9	Copy TiledStableDiffusionRefineInvocation as a starting point for TiledMultiDiffusionDenoiseLatents.py	2024-06-18 15:34:22 -04:00
Ryan Dick	867a7642a6	Change tiling strategy to make TiledStableDiffusionRefineInvocation work with more tile shapes and overlaps.	2024-06-18 15:31:58 -04:00
Ryan Dick	d9d1c8f9cb	Expose a few more params from TiledStableDiffusionRefineInvocation.	2024-06-18 15:31:58 -04:00
Ryan Dick	e03eb7fb45	Add support for LoRA models in TiledStableDiffusionRefineInvocation.	2024-06-18 15:31:58 -04:00
Ryan Dick	85db33bc7e	Add naive ControlNet support to TiledStableDiffusionRefineInvocation	2024-06-18 15:31:58 -04:00
Ryan Dick	93e3a2b504	Fix ControlNetModel type hint import source.	2024-06-18 15:31:58 -04:00
Ryan Dick	6a7a26f1bf	Rough prototype of TiledStableDiffusionRefineInvocation is working.	2024-06-18 15:31:58 -04:00
Ryan Dick	08ca03ef9f	WIP - TiledStableDiffusionRefine	2024-06-18 15:31:54 -04:00
Ryan Dick	ccf90b6bd6	Minor improvements to LatentsToImageInvocation type hints.	2024-06-18 15:31:21 -04:00
Ryan Dick	753239b48d	Expose vae_decode(...) as a staticmethod on LatentsToImageInvocation.	2024-06-18 15:31:21 -04:00
Ryan Dick	65fa4664c9	Fix return type of prepare_noise_and_latents(...).	2024-06-18 15:31:21 -04:00
Ryan Dick	297570ded3	Make init_scheduler() a staticmethod on DenoiseLatentsInvocation so that it can be called externally.	2024-06-18 15:31:21 -04:00
Ryan Dick	680fdcf293	Only allow a single positive/negative prompt conditioning input for tiled refine.	2024-06-18 15:31:21 -04:00
Ryan Dick	5ff91f2c44	WIP on TiledStableDiffusionRefine	2024-06-18 15:31:14 -04:00
Ryan Dick	69aa7057e7	Convert several methods in DenoiseLatentsInvocation to staticmethods so that they can be called externally.	2024-06-18 15:25:08 -04:00
Ryan Dick	d3932f40de	Simplify the logic in prepare_noise_and_latents(...).	2024-06-18 15:25:08 -04:00
Ryan Dick	ee74cd7fab	Split out the prepare_noise_and_latents(...) logic in DenoiseLatentsInvocation so that it can be called from other invocations.	2024-06-18 15:25:08 -04:00
Ryan Dick	bda25b40c9	(minor) Add a TODO note to get_scheduler(...).	2024-06-18 15:25:08 -04:00
Ryan Dick	7e9a89f8c6	Tidy `SilenceWarnings` context manager (#6493 ) ## Summary No functional changes, just cleaning some things up as I touch the code. This PR cleans up the `SilenceWarnings` context manager: - Fix type errors - Enable SilenceWarnings to be used as both a context manager and a decorator - Remove duplicate implementation - Check the initial verbosity on `__enter__()` rather than `__init__()` - Save an indentation level in DenoiseLatents ## QA Instructions I generated an image to confirm that warnings are still muted. ## Merge Plan - [x] ⚠️ Merge https://github.com/invoke-ai/InvokeAI/pull/6492 first, then change the target branch to `main`. ## Checklist - [x] _The PR has a short but descriptive title, suitable for a changelog_ - [x] _Tests added / updated (if applicable)_ - [x] _Documentation added / updated (if applicable)_	2024-06-18 15:23:32 -04:00
Ryan Dick	79ceac2f82	(minor) Use SilenceWarnings as a decorator rather than a context manager to save an indentation level.	2024-06-18 15:06:22 -04:00
Ryan Dick	8e47e005a7	Tidy SilenceWarnings context manager: - Fix type errors - Enable SilenceWarnings to be used as both a context manager and a decorator - Remove duplicate implementation - Check the initial verbosity on __enter__() rather than __init__()	2024-06-18 15:06:22 -04:00
Ryan Dick	d13aafb514	Tidy denoise_latents.py imports to all use absolute import paths.	2024-06-18 15:06:22 -04:00
Brandon Rising	63a7e19dbf	Run ruff	2024-06-18 10:38:29 -04:00
Brandon Rising	fbc5a8ec65	Ignore validation on improperly formatted hashes (pytest)	2024-06-18 10:38:29 -04:00
Brandon Rising	8ce6e4540e	Run ruff	2024-06-18 10:38:29 -04:00
Brandon Rising	f14f377ede	Update validator list	2024-06-18 10:38:29 -04:00
Brandon Rising	1925f83f5e	Update validator list	2024-06-18 10:38:29 -04:00
Brandon Rising	3a5ad6d112	Update validator list	2024-06-18 10:38:29 -04:00
Brandon Rising	41a6bb45f3	Initial functionality	2024-06-18 10:38:29 -04:00
chainchompa	70e40fa6c1	added route to install huggingface models from model marketplace (#6515 ) ## Summary added route to install huggingface models from model marketplace <!--A description of the changes in this PR. Include the kind of change (fix, feature, docs, etc), the "why" and the "how". Screenshots or videos are useful for frontend changes.--> ## Related Issues / Discussions <!--WHEN APPLICABLE: List any related issues or discussions on github or discord. If this PR closes an issue, please use the "Closes #1234" format, so that the issue will be automatically closed when the PR merges.--> ## QA Instructions test by going to http://localhost:5173/api/v2/models/install/huggingface?source=${hfRepo} <!--WHEN APPLICABLE: Describe how we can test the changes in this PR.--> ## Merge Plan <!--WHEN APPLICABLE: Large PRs, or PRs that touch sensitive things like DB schemas, may need some care when merging. For example, a careful rebase by the change author, timing to not interfere with a pending release, or a message to contributors on discord after merging.--> ## Checklist - [ ] _The PR has a short but descriptive title, suitable for a changelog_ - [ ] _Tests added / updated (if applicable)_ - [ ] _Documentation added / updated (if applicable)_	2024-06-16 21:13:58 -04:00
psychedelicious	e26125b734	tests: fix test_model_install.py	2024-06-17 10:57:11 +10:00
psychedelicious	cd70937b7f	feat(api): improved model install confirmation page styling & messaging	2024-06-17 10:51:08 +10:00
psychedelicious	f002bca2fa	feat(ui): handle new `model_install_download_started` event When a model install is initiated from outside the client, we now trigger the model manager tab's model install list to update. - Handle new `model_install_download_started` event - Handle `model_install_download_complete` event (this event is not new but was never handled) - Update optimistic updates/cache invalidation logic to efficiently update the model install list	2024-06-17 10:07:10 +10:00
psychedelicious	56771de856	feat(ui): add redux actions for `model_install_download_started` event	2024-06-17 09:52:46 +10:00
psychedelicious	c11478a94a	chore(ui): typegen	2024-06-17 09:51:18 +10:00
psychedelicious	fb694b3e17	feat(app): add `model_install_download_started` event Previously, we used `model_install_download_progress` for both download starting and progressing. When handling this event, we don't know which actual thing it represents. Add `model_install_download_started` event to explicitly represent a model download started event.	2024-06-17 09:50:25 +10:00
psychedelicious	1bc98abc76	docs(ui): explain model install events	2024-06-17 09:33:46 +10:00
chainchompa	7f03b04b2f	Merge branch 'main' into chainchompa/model-install-deeplink	2024-06-14 17:16:25 -04:00
chainchompa	4029972530	formatting	2024-06-14 17:15:55 -04:00
chainchompa	328f160e88	refetch model installs when a new model install starts	2024-06-14 17:09:07 -04:00
chainchompa	aae318425d	added route for installing huggingface model from model marketplace	2024-06-14 17:08:39 -04:00
Ryan Dick	785bb1d9e4	Fix all comparisons against the DEFAULT_PRECISION constant. DEFAULT_PRECISION is a torch.dtype. Previously, it was compared to a str in a number of places where it would always resolve to False. This is a bugfix that results in a change to the default behavior. In practice, this will not change the behavior for many users, because it only causes a change in behavior if a users has configured float32 as their default precision.	2024-06-14 11:26:10 -07:00
Lincoln Stein	a3cb5da130	Improve RAM<->VRAM memory copy performance in LoRA patching and elsewhere (#6490 ) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes requested during penultimate review * add non-blocking=True parameters to several torch.nn.Module.to() calls, for slight performance increases * fix ruff errors * prevent crash on non-cuda-enabled systems --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com>	2024-06-13 17:10:03 +00:00
blessedcoolant	568a4844f7	fix: other recursive imports	2024-06-10 04:12:20 -07:00
blessedcoolant	b1e56e2485	fix: SchedulerOutput not being imported correctly	2024-06-10 04:12:20 -07:00
Kent Keirsey	9432336e2b	Add simplified model manager install API to InvocationContext (#6132 ) ## Summary This three two model manager-related methods to the InvocationContext uniform API. They are accessible via `context.models.`: 1. `load_local_model(model_path: Path, loader: Optional[Callable[[Path], AnyModel]] = None) -> LoadedModelWithoutConfig`* Load the model located at the indicated path. This will load a local model (.safetensors, .ckpt or diffusers directory) into the model manager RAM cache and return its `LoadedModelWithoutConfig`. If the optional loader argument is provided, the loader will be invoked to load the model into memory. Otherwise the method will call `safetensors.torch.load_file()` `torch.load()` (with a pickle scan), or `from_pretrained()` as appropriate to the path type. Be aware that the `LoadedModelWithoutConfig` object differs from `LoadedModel` by having no `config` attribute. Here is an example of usage: ``` def invoke(self, context: InvocatinContext) -> ImageOutput: model_path = Path('/opt/models/RealESRGAN_x4plus.pth') loadnet = context.models.load_local_model(model_path) with loadnet as loadnet_model: upscaler = RealESRGAN(loadnet=loadnet_model,...) ``` --- 2. `load_remote_model(source: str \| AnyHttpUrl, loader: Optional[Callable[[Path], AnyModel]] = None) -> LoadedModelWithoutConfig` Load the model located at the indicated URL or repo_id. This is similar to `load_local_model()` but it accepts either a HugginFace repo_id (as a string), or a URL. The model's file(s) will be downloaded to `models/.download_cache` and then loaded, returning a ``` def invoke(self, context: InvocatinContext) -> ImageOutput: model_url = 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth' loadnet = context.models.load_remote_model(model_url) with loadnet as loadnet_model: upscaler = RealESRGAN(loadnet=loadnet_model,...) ``` --- 3. `download_and_cache_model( source: str \| AnyHttpUrl, access_token: Optional[str] = None, timeout: Optional[int] = 0) -> Path` Download the model file located at source to the models cache and return its Path. This will check `models/.download_cache` for the desired model file and download it from the indicated source if not already present. The local Path to the downloaded file is then returned. --- ## Other Changes This PR performs a migration, in which it renames `models/.cache` to `models/.convert_cache`, and migrates previously-downloaded ESRGAN, openpose, DepthAnything and Lama inpaint models from the `models/core` directory into `models/.download_cache`. There are a number of legacy model files in `models/core`, such as GFPGAN, which are no longer used. This PR deletes them and tidies up the `models/core` directory. ## Related Issues / Discussions I have systematically replaced all the calls to `download_with_progress_bar()`. This function is no longer used elsewhere and has been removed. <!--WHEN APPLICABLE: List any related issues or discussions on github or discord. If this PR closes an issue, please use the "Closes #1234" format, so that the issue will be automatically closed when the PR merges.--> ## QA Instructions I have added unit tests for the three new calls. You may test that the `load_and_cache_model()` call is working by running the upscaler within the web app. On first try, you will see the model file being downloaded into the models `.cache` directory. On subsequent tries, the model will either load from RAM (if it hasn't been displaced) or will be loaded from the filesystem. <!--WHEN APPLICABLE: Describe how we can test the changes in this PR.--> ## Merge Plan Squash merge when approved. <!--WHEN APPLICABLE: Large PRs, or PRs that touch sensitive things like DB schemas, may need some care when merging. For example, a careful rebase by the change author, timing to not interfere with a pending release, or a message to contributors on discord after merging.--> ## Checklist - [X] _The PR has a short but descriptive title, suitable for a changelog_ - [X] _Tests added / updated (if applicable)_ - [X] _Documentation added / updated (if applicable)_	2024-06-08 16:24:31 -07:00
Lincoln Stein	7d19af2caa	Merge branch 'main' into lstein/feat/simple-mm2-api	2024-06-08 18:55:06 -04:00
Ryan Dick	0dbec3ad8b	Split up latent.py (code reorganization, no functional changes) (#6491 ) ## Summary I've started working towards a better tiled upscaling implementation. It is going to require some refactoring of `DenoiseLatentsInvocation`. As a first step, this PR splits up all of the invocations in latent.py into their own files. That file had become a bit of a dumping ground - it should be a bit more manageable to work with now. This PR just re-organizes the code. There should be no functional changes. ## QA Instructions I've done some light smoke testing. I'll do some more before merging. The main risk is that I missed a broken import, or some other copy-paste error. ## Checklist - [x] _The PR has a short but descriptive title, suitable for a changelog_ - [x] _Tests added / updated (if applicable)_: N/A - [x] _Documentation added / updated (if applicable)_: N/A	2024-06-07 12:01:56 -04:00
Ryan Dick	52c0c4a32f	Rename latent.py -> denoise_latents.py.	2024-06-07 09:28:42 -04:00
Ryan Dick	8f1afc032a	Move SchedulerInvocation to a new file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	854bca668a	Move CreateDenoiseMaskInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	fea9013cad	Move CreateGradientMaskInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	045caddee1	Move LatentsToImageInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	58697141bf	Move ImageToLatentsInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	5e419dbb56	Move ScaleLatentsInvocation and ResizeLatentsInvocation to their own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	595096bdcf	Move BlendLatentsInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	ed03d281e6	Move CropLatentsCoreInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
Ryan Dick	0b37496c57	Move IdealSizeInvocation to its own file. No functional changes.	2024-06-07 09:28:42 -04:00
psychedelicious	fde58ce0a3	Merge remote-tracking branch 'origin/main' into lstein/feat/simple-mm2-api	2024-06-07 14:23:41 +10:00