Return a MaskOutput from SegmentAnythingModelInvocation. And add a MaskTensorToImageInvocation.

Split invokeai/backend/image_util/segment_anything/ dir into grounding_dino/ and segment_anything/
Split GroundedSamInvocation into GroundingDinoInvocation and SegmentAnythingModelInvocation.
2026-01-18 15:48:04 -05:00 · 2024-07-31 17:16:14 -04:00 · 2024-07-31 12:28:47 -04:00 · 2024-07-31 12:20:23 -04:00 · 2024-07-31 10:25:34 -04:00 · 2024-07-31 10:00:30 -04:00
215 changed files with 8119 additions and 2957 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -55,6 +55,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
 ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack use pnpm@8.x
 RUN corepack enable

 WORKDIR /build
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -6,7 +6,7 @@ import pathlib
 import traceback
 from copy import deepcopy
 from tempfile import TemporaryDirectory
-from typing import Any, Dict, List, Optional, Type
+from typing import List, Optional, Type

 from fastapi import Body, Path, Query, Response, UploadFile
 from fastapi.responses import FileResponse, HTMLResponse
@@ -430,13 +430,11 @@ async def delete_model_image(
 async def install_model(
    source: str = Query(description="Model source to install, can be a local path, repo_id, or remote URL"),
    inplace: Optional[bool] = Query(description="Whether or not to install a local model in place", default=False),
-    # TODO(MM2): Can we type this?
-    config: Optional[Dict[str, Any]] = Body(
-        description="Dict of fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
-        default=None,
+    access_token: Optional[str] = Query(description="access token for the remote resource", default=None),
+    config: ModelRecordChanges = Body(
+        description="Object containing fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
        example={"name": "string", "description": "string"},
    ),
-    access_token: Optional[str] = None,
 ) -> ModelInstallJob:
    """Install a model using a string identifier.

@@ -451,8 +449,9 @@ async def install_model(
       - model/name:fp16:path/to/model.safetensors
       - model/name::path/to/model.safetensors

-    `config` is an optional dict containing model configuration values that will override
-    the ones that are probed automatically.
+    `config` is a ModelRecordChanges object. Fields in this object will override
+    the ones that are probed automatically. Pass an empty object to accept
+    all the defaults.

    `access_token` is an optional access token for use with Urls that require
    authentication.
@@ -737,7 +736,7 @@ async def convert_model(
        # write the converted file to the convert path
        raw_model = converted_model.model
        assert hasattr(raw_model, "save_pretrained")
-        raw_model.save_pretrained(convert_path)
+        raw_model.save_pretrained(convert_path)  # type: ignore
        assert convert_path.exists()

        # temporarily rename the original safetensors file so that there is no naming conflict
@@ -750,12 +749,12 @@ async def convert_model(
        try:
            new_key = installer.install_path(
                convert_path,
-                config={
-                    "name": original_name,
-                    "description": model_config.description,
-                    "hash": model_config.hash,
-                    "source": model_config.source,
-                },
+                config=ModelRecordChanges(
+                    name=original_name,
+                    description=model_config.description,
+                    hash=model_config.hash,
+                    source=model_config.source,
+                ),
            )
        except Exception as e:
            logger.error(str(e))
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
 import inspect
+import os
 from contextlib import ExitStack
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

@@ -36,9 +37,10 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
-from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.model_manager import BaseModelType, ModelVariantType
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion import PipelineIntermediateState
+from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
 from invokeai.backend.stable_diffusion.diffusers_pipeline import (
    ControlNetData,
    StableDiffusionGeneratorPipeline,
@@ -53,6 +55,18 @@ from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
    TextConditioningData,
    TextConditioningRegions,
 )
+from invokeai.backend.stable_diffusion.diffusion.custom_atttention import CustomAttnProcessor2_0
+from invokeai.backend.stable_diffusion.diffusion_backend import StableDiffusionBackend
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.controlnet import ControlNetExt
+from invokeai.backend.stable_diffusion.extensions.freeu import FreeUExt
+from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
+from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
+from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
+from invokeai.backend.stable_diffusion.extensions.rescale_cfg import RescaleCFGExt
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
+from invokeai.backend.stable_diffusion.extensions.t2i_adapter import T2IAdapterExt
+from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
 from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
 from invokeai.backend.util.devices import TorchDevice
@@ -314,9 +328,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
        context: InvocationContext,
        positive_conditioning_field: Union[ConditioningField, list[ConditioningField]],
        negative_conditioning_field: Union[ConditioningField, list[ConditioningField]],
-        unet: UNet2DConditionModel,
        latent_height: int,
        latent_width: int,
+        device: torch.device,
+        dtype: torch.dtype,
        cfg_scale: float | list[float],
        steps: int,
        cfg_rescale_multiplier: float,
@@ -330,10 +345,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
            uncond_list = [uncond_list]

        cond_text_embeddings, cond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
-            cond_list, context, unet.device, unet.dtype
+            cond_list, context, device, dtype
        )
        uncond_text_embeddings, uncond_text_embedding_masks = DenoiseLatentsInvocation._get_text_embeddings_and_masks(
-            uncond_list, context, unet.device, unet.dtype
+            uncond_list, context, device, dtype
        )

        cond_text_embedding, cond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
@@ -341,14 +356,14 @@ class DenoiseLatentsInvocation(BaseInvocation):
            masks=cond_text_embedding_masks,
            latent_height=latent_height,
            latent_width=latent_width,
-            dtype=unet.dtype,
+            dtype=dtype,
        )
        uncond_text_embedding, uncond_regions = DenoiseLatentsInvocation._concat_regional_text_embeddings(
            text_conditionings=uncond_text_embeddings,
            masks=uncond_text_embedding_masks,
            latent_height=latent_height,
            latent_width=latent_width,
-            dtype=unet.dtype,
+            dtype=dtype,
        )

        if isinstance(cfg_scale, list):
@@ -455,6 +470,65 @@ class DenoiseLatentsInvocation(BaseInvocation):

        return controlnet_data

+    @staticmethod
+    def parse_controlnet_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        control_input: ControlField | list[ControlField] | None,
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        # Normalize control_input to a list.
+        control_list: list[ControlField]
+        if isinstance(control_input, ControlField):
+            control_list = [control_input]
+        elif isinstance(control_input, list):
+            control_list = control_input
+        elif control_input is None:
+            control_list = []
+        else:
+            raise ValueError(f"Unexpected control_input type: {type(control_input)}")
+
+        for control_info in control_list:
+            model = exit_stack.enter_context(context.models.load(control_info.control_model))
+            ext_manager.add_extension(
+                ControlNetExt(
+                    model=model,
+                    image=context.images.get_pil(control_info.image.image_name),
+                    weight=control_info.control_weight,
+                    begin_step_percent=control_info.begin_step_percent,
+                    end_step_percent=control_info.end_step_percent,
+                    control_mode=control_info.control_mode,
+                    resize_mode=control_info.resize_mode,
+                )
+            )
+
+    @staticmethod
+    def parse_t2i_adapter_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        t2i_adapters: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        if t2i_adapters is None:
+            return
+
+        # Handle the possibility that t2i_adapters could be a list or a single T2IAdapterField.
+        if isinstance(t2i_adapters, T2IAdapterField):
+            t2i_adapters = [t2i_adapters]
+
+        for t2i_adapter_field in t2i_adapters:
+            ext_manager.add_extension(
+                T2IAdapterExt(
+                    node_context=context,
+                    model_id=t2i_adapter_field.t2i_adapter_model,
+                    image=context.images.get_pil(t2i_adapter_field.image.image_name),
+                    weight=t2i_adapter_field.weight,
+                    begin_step_percent=t2i_adapter_field.begin_step_percent,
+                    end_step_percent=t2i_adapter_field.end_step_percent,
+                    resize_mode=t2i_adapter_field.resize_mode,
+                )
+            )
+
    def prep_ip_adapter_image_prompts(
        self,
        context: InvocationContext,
@@ -664,7 +738,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
        else:
            masked_latents = torch.where(mask < 0.5, 0.0, latents)

-        return 1 - mask, masked_latents, self.denoise_mask.gradient
+        return mask, masked_latents, self.denoise_mask.gradient

    @staticmethod
    def prepare_noise_and_latents(
@@ -707,12 +781,147 @@ class DenoiseLatentsInvocation(BaseInvocation):

        return seed, noise, latents

+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        if os.environ.get("USE_MODULAR_DENOISE", False):
+            return self._new_invoke(context)
+        else:
+            return self._old_invoke(context)
+
    @torch.no_grad()
    @SilenceWarnings()  # This quenches the NSFW nag from diffusers.
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
+    def _new_invoke(self, context: InvocationContext) -> LatentsOutput:
+        ext_manager = ExtensionsManager(is_canceled=context.util.is_canceled)
+
+        device = TorchDevice.choose_torch_device()
+        dtype = TorchDevice.choose_torch_dtype()
+
+        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
+        _, _, latent_height, latent_width = latents.shape
+
+        conditioning_data = self.get_conditioning_data(
+            context=context,
+            positive_conditioning_field=self.positive_conditioning,
+            negative_conditioning_field=self.negative_conditioning,
+            cfg_scale=self.cfg_scale,
+            steps=self.steps,
+            latent_height=latent_height,
+            latent_width=latent_width,
+            device=device,
+            dtype=dtype,
+            # TODO: old backend, remove
+            cfg_rescale_multiplier=self.cfg_rescale_multiplier,
+        )
+
+        scheduler = get_scheduler(
+            context=context,
+            scheduler_info=self.unet.scheduler,
+            scheduler_name=self.scheduler,
+            seed=seed,
+        )
+
+        timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
+            scheduler,
+            seed=seed,
+            device=device,
+            steps=self.steps,
+            denoising_start=self.denoising_start,
+            denoising_end=self.denoising_end,
+        )
+
+        # get the unet's config so that we can pass the base to sd_step_callback()
+        unet_config = context.models.get_config(self.unet.unet.key)
+
+        ### preview
+        def step_callback(state: PipelineIntermediateState) -> None:
+            context.util.sd_step_callback(state, unet_config.base)
+
+        ext_manager.add_extension(PreviewExt(step_callback))
+
+        ### cfg rescale
+        if self.cfg_rescale_multiplier > 0:
+            ext_manager.add_extension(RescaleCFGExt(self.cfg_rescale_multiplier))
+
+        ### freeu
+        if self.unet.freeu_config:
+            ext_manager.add_extension(FreeUExt(self.unet.freeu_config))
+
+        ### seamless
+        if self.unet.seamless_axes:
+            ext_manager.add_extension(SeamlessExt(self.unet.seamless_axes))
+
+        ### inpaint
+        mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
+        # NOTE: We used to identify inpainting models by inpecting the shape of the loaded UNet model weights. Now we
+        # use the ModelVariantType config. During testing, there was a report of a user with models that had an
+        # incorrect ModelVariantType value. Re-installing the model fixed the issue. If this issue turns out to be
+        # prevalent, we will have to revisit how we initialize the inpainting extensions.
+        if unet_config.variant == ModelVariantType.Inpaint:
+            ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
+        elif mask is not None:
+            ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))
+
+        # Initialize context for modular denoise
+        latents = latents.to(device=device, dtype=dtype)
+        if noise is not None:
+            noise = noise.to(device=device, dtype=dtype)
+        denoise_ctx = DenoiseContext(
+            inputs=DenoiseInputs(
+                orig_latents=latents,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                noise=noise,
+                seed=seed,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                conditioning_data=conditioning_data,
+                attention_processor_cls=CustomAttnProcessor2_0,
+            ),
+            unet=None,
+            scheduler=scheduler,
+        )
+
+        # context for loading additional models
+        with ExitStack() as exit_stack:
+            # later should be smth like:
+            # for extension_field in self.extensions:
+            #    ext = extension_field.to_extension(exit_stack, context, ext_manager)
+            #    ext_manager.add_extension(ext)
+            self.parse_controlnet_field(exit_stack, context, self.control, ext_manager)
+            self.parse_t2i_adapter_field(exit_stack, context, self.t2i_adapter, ext_manager)
+
+            # ext: t2i/ip adapter
+            ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)
+
+            unet_info = context.models.load(self.unet.unet)
+            assert isinstance(unet_info.model, UNet2DConditionModel)
+            with (
+                unet_info.model_on_device() as (cached_weights, unet),
+                ModelPatcher.patch_unet_attention_processor(unet, denoise_ctx.inputs.attention_processor_cls),
+                # ext: controlnet
+                ext_manager.patch_extensions(denoise_ctx),
+                # ext: freeu, seamless, ip adapter, lora
+                ext_manager.patch_unet(unet, cached_weights),
+            ):
+                sd_backend = StableDiffusionBackend(unet, scheduler)
+                denoise_ctx.unet = unet
+                result_latents = sd_backend.latents_from_embeddings(denoise_ctx, ext_manager)
+
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        result_latents = result_latents.detach().to("cpu")
+        TorchDevice.empty_cache()
+
+        name = context.tensors.save(tensor=result_latents)
+        return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
+
+    @torch.no_grad()
+    @SilenceWarnings()  # This quenches the NSFW nag from diffusers.
+    def _old_invoke(self, context: InvocationContext) -> LatentsOutput:
        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)

        mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+        # At this point, the mask ranges from 0 (leave unchanged) to 1 (inpaint).
+        # We invert the mask here for compatibility with the old backend implementation.
+        if mask is not None:
+            mask = 1 - mask

        # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
        # below. Investigate whether this is appropriate.
@@ -757,7 +966,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
            ExitStack() as exit_stack,
            unet_info.model_on_device() as (model_state_dict, unet),
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
-            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
            ModelPatcher.apply_lora_unet(
                unet,
@@ -788,7 +997,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
                context=context,
                positive_conditioning_field=self.positive_conditioning,
                negative_conditioning_field=self.negative_conditioning,
-                unet=unet,
+                device=unet.device,
+                dtype=unet.dtype,
                latent_height=latent_height,
                latent_width=latent_width,
                cfg_scale=self.cfg_scale,
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -48,6 +48,7 @@ class UIType(str, Enum, metaclass=MetaEnum):
    ControlNetModel = "ControlNetModelField"
    IPAdapterModel = "IPAdapterModelField"
    T2IAdapterModel = "T2IAdapterModelField"
+    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
    # endregion

    # region Misc Field Types
@@ -134,6 +135,7 @@ class FieldDescriptions:
    sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
    sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
    onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
+    spandrel_image_to_image_model = "Image-to-Image model"
    lora_weight = "The weight at which the LoRA is applied to each model"
    compel_prompt = "Prompt to be parsed by Compel to create a conditioning tensor"
    raw_prompt = "Raw prompt text (no parsing)"
@@ -240,6 +242,23 @@ class ConditioningField(BaseModel):
    )


+class BoundingBoxField(BaseModel):
+    """A bounding box primitive value."""
+
+    x_min: int = Field(ge=0, description="The minimum x-coordinate of the bounding box (inclusive).")
+    x_max: int = Field(ge=0, description="The maximum x-coordinate of the bounding box (exclusive).")
+    y_min: int = Field(ge=0, description="The minimum y-coordinate of the bounding box (inclusive).")
+    y_max: int = Field(ge=0, description="The maximum y-coordinate of the bounding box (exclusive).")
+
+    score: Optional[float] = Field(
+        default=None,
+        ge=0.0,
+        le=1.0,
+        description="The score associated with the bounding box. In the range [0, 1]. This value is typically set "
+        "when the bounding box was produced by a detector and has an associated confidence score.",
+    )
+
+
 class MetadataField(RootModel[dict[str, Any]]):
    """
    Pydantic model for metadata with custom root of type dict[str, Any].
--- a/invokeai/app/invocations/grounding_dino.py
+++ b/invokeai/app/invocations/grounding_dino.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+
+import torch
+from PIL import Image
+from transformers import pipeline
+from transformers.pipelines import ZeroShotObjectDetectionPipeline
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField
+from invokeai.app.invocations.primitives import BoundingBoxCollectionOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.grounding_dino.detection_result import DetectionResult
+from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
+
+GROUNDING_DINO_MODEL_ID = "IDEA-Research/grounding-dino-tiny"
+
+
+@invocation(
+    "grounding_dino",
+    title="Grounding DINO (Text Prompt Object Detection)",
+    tags=["prompt", "object detection"],
+    category="image",
+    version="1.0.0",
+)
+class GroundingDinoInvocation(BaseInvocation):
+    """Runs a Grounding DINO model (https://arxiv.org/pdf/2303.05499). Performs zero-shot bounding-box object detection
+    from a text prompt.
+
+    Reference:
+    - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+    """
+
+    prompt: str = InputField(description="The prompt describing the object to segment.")
+    image: ImageField = InputField(description="The image to segment.")
+    detection_threshold: float = InputField(
+        description="The detection threshold for the Grounding DINO model. All detected bounding boxes with scores above this threshold will be returned.",
+        ge=0.0,
+        le=1.0,
+        default=0.3,
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> BoundingBoxCollectionOutput:
+        # The model expects a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        detections = self._detect(
+            context=context, image=image_pil, labels=[self.prompt], threshold=self.detection_threshold
+        )
+
+        # Convert detections to BoundingBoxCollectionOutput.
+        bounding_boxes: list[BoundingBoxField] = []
+        for detection in detections:
+            bounding_boxes.append(
+                BoundingBoxField(
+                    x_min=detection.box.xmin,
+                    x_max=detection.box.xmax,
+                    y_min=detection.box.ymin,
+                    y_max=detection.box.ymax,
+                    score=detection.score,
+                )
+            )
+        return BoundingBoxCollectionOutput(collection=bounding_boxes)
+
+    @staticmethod
+    def _load_grounding_dino(model_path: Path):
+        grounding_dino_pipeline = pipeline(
+            model=str(model_path),
+            task="zero-shot-object-detection",
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(grounding_dino_pipeline, ZeroShotObjectDetectionPipeline)
+        return GroundingDinoPipeline(grounding_dino_pipeline)
+
+    def _detect(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+        labels: list[str],
+        threshold: float = 0.3,
+    ) -> list[DetectionResult]:
+        """Use Grounding DINO to detect bounding boxes for a set of labels in an image."""
+        # TODO(ryand): I copied this "."-handling logic from the transformers example code. Test it and see if it
+        # actually makes a difference.
+        labels = [label if label.endswith(".") else label + "." for label in labels]
+
+        with context.models.load_remote_model(
+            source=GROUNDING_DINO_MODEL_ID, loader=GroundingDinoInvocation._load_grounding_dino
+        ) as detector:
+            assert isinstance(detector, GroundingDinoPipeline)
+            return detector.detect(image=image, candidate_labels=labels, threshold=threshold)
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@@ -24,7 +24,7 @@ from invokeai.app.invocations.fields import (
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion import set_seamless
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
 from invokeai.backend.util.devices import TorchDevice

@@ -59,7 +59,7 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):

        vae_info = context.models.load(self.vae.vae)
        assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
-        with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae:
+        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
            assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
            latents = latents.to(vae.device)
            if self.fp32:
--- a/invokeai/app/invocations/mask.py
+++ b/invokeai/app/invocations/mask.py
@@ -1,9 +1,10 @@
 import numpy as np
 import torch
+from PIL import Image

 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
-from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithMetadata
-from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput, MaskOutput


@invocation(
@@ -118,3 +119,28 @@ class ImageMaskToTensorInvocation(BaseInvocation, WithMetadata):
            height=mask.shape[1],
            width=mask.shape[2],
        )
+
+
+@invocation(
+    "tensor_mask_to_image",
+    title="Tensor Mask to Image",
+    tags=["mask"],
+    category="mask",
+    version="1.0.0",
+)
+class MaskTensorToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Convert a mask tensor to an image."""
+
+    mask: TensorField = InputField(description="The mask tensor to convert.")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        mask = context.tensors.load(self.mask.tensor_name)
+        # Ensure that the mask is binary.
+        if mask.dtype != torch.bool:
+            mask = mask > 0.5
+        mask_np = mask.float().cpu().detach().numpy() * 255
+        mask_np = mask_np.astype(np.uint8)
+
+        mask_pil = Image.fromarray(mask_np, mode="L")
+        image_dto = context.images.save(image=mask_pil)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@@ -7,6 +7,7 @@ import torch
 from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
 from invokeai.app.invocations.fields import (
+    BoundingBoxField,
    ColorField,
    ConditioningField,
    DenoiseMaskField,
@@ -469,3 +470,24 @@ class ConditioningCollectionInvocation(BaseInvocation):


 # endregion
+
+# region BoundingBox
+
+
+@invocation_output("bounding_box_output")
+class BoundingBoxOutput(BaseInvocationOutput):
+    """Base class for nodes that output a single bounding box"""
+
+    bounding_box: BoundingBoxField = OutputField(description="The output bounding box.")
+
+
+@invocation_output("bounding_box_collection_output")
+class BoundingBoxCollectionOutput(BaseInvocationOutput):
+    """Base class for nodes that output a collection of bounding boxes"""
+
+    collection: list[BoundingBoxField] = OutputField(
+        description="The output bounding boxes.",
+    )
+
+
+# endregion
--- a/invokeai/app/invocations/segment_anything_model.py
+++ b/invokeai/app/invocations/segment_anything_model.py
@@ -0,0 +1,155 @@
+from pathlib import Path
+from typing import Literal
+
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoModelForMaskGeneration, AutoProcessor
+from transformers.models.sam import SamModel
+from transformers.models.sam.processing_sam import SamProcessor
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField, TensorField
+from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.segment_anything.mask_refinement import mask_to_polygon, polygon_to_mask
+from invokeai.backend.image_util.segment_anything.segment_anything_model import SegmentAnythingModel
+
+SEGMENT_ANYTHING_MODEL_ID = "facebook/sam-vit-base"
+
+
+@invocation(
+    "segment_anything_model",
+    title="Segment Anything Model",
+    tags=["prompt", "segmentation"],
+    category="segmentation",
+    version="1.0.0",
+)
+class SegmentAnythingModelInvocation(BaseInvocation):
+    """Runs a Segment Anything Model (https://arxiv.org/pdf/2304.02643).
+
+    Reference:
+    - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+    """
+
+    image: ImageField = InputField(description="The image to segment.")
+    bounding_boxes: list[BoundingBoxField] = InputField(description="The bounding boxes to prompt the SAM model with.")
+    apply_polygon_refinement: bool = InputField(
+        description="Whether to apply polygon refinement to the masks. This will smooth the edges of the masks slightly and ensure that each mask consists of a single closed polygon (before merging).",
+        default=True,
+    )
+    mask_filter: Literal["all", "largest", "highest_box_score"] = InputField(
+        description="The filtering to apply to the detected masks before merging them into a final output.",
+        default="all",
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> MaskOutput:
+        # The models expect a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        if len(self.bounding_boxes) == 0:
+            combined_mask = torch.zeros(image_pil.size[::-1], dtype=torch.bool)
+        else:
+            masks = self._segment(context=context, image=image_pil)
+            masks = self._filter_masks(masks=masks, bounding_boxes=self.bounding_boxes)
+
+            # masks contains bool values, so we merge them via max-reduce.
+            combined_mask, _ = torch.stack(masks).max(dim=0)
+
+        mask_tensor_name = context.tensors.save(combined_mask)
+        height, width = combined_mask.shape
+        return MaskOutput(mask=TensorField(tensor_name=mask_tensor_name), width=width, height=height)
+
+    @staticmethod
+    def _load_sam_model(model_path: Path):
+        sam_model = AutoModelForMaskGeneration.from_pretrained(
+            model_path,
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(sam_model, SamModel)
+
+        sam_processor = AutoProcessor.from_pretrained(model_path, local_files_only=True)
+        assert isinstance(sam_processor, SamProcessor)
+        return SegmentAnythingModel(sam_model=sam_model, sam_processor=sam_processor)
+
+    def _segment(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+    ) -> list[torch.Tensor]:
+        """Use Segment Anything (SAM) to generate masks given an image + a set of bounding boxes."""
+        # Convert the bounding boxes to the SAM input format.
+        sam_bounding_boxes = [[bb.x_min, bb.y_min, bb.x_max, bb.y_max] for bb in self.bounding_boxes]
+
+        with (
+            context.models.load_remote_model(
+                source=SEGMENT_ANYTHING_MODEL_ID, loader=SegmentAnythingModelInvocation._load_sam_model
+            ) as sam_pipeline,
+        ):
+            assert isinstance(sam_pipeline, SegmentAnythingModel)
+            masks = sam_pipeline.segment(image=image, bounding_boxes=sam_bounding_boxes)
+
+        masks = self._process_masks(masks)
+        if self.apply_polygon_refinement:
+            masks = self._apply_polygon_refinement(masks)
+
+        return masks
+
+    def _process_masks(self, masks: torch.Tensor) -> list[torch.Tensor]:
+        """Convert the tensor output from the Segment Anything model from a tensor of shape
+        [num_masks, channels, height, width] to a list of tensors of shape [height, width].
+        """
+        assert masks.dtype == torch.bool
+        # [num_masks, channels, height, width] -> [num_masks, height, width]
+        masks, _ = masks.max(dim=1)
+        # Split the first dimension into a list of masks.
+        return list(masks.cpu().unbind(dim=0))
+
+    def _apply_polygon_refinement(self, masks: list[torch.Tensor]) -> list[torch.Tensor]:
+        """Apply polygon refinement to the masks.
+
+        Convert each mask to a polygon, then back to a mask. This has the following effect:
+        - Smooth the edges of the mask slightly.
+        - Ensure that each mask consists of a single closed polygon
+            - Removes small mask pieces.
+            - Removes holes from the mask.
+        """
+        # Convert tensor masks to np masks.
+        np_masks = [mask.cpu().numpy().astype(np.uint8) for mask in masks]
+
+        # Apply polygon refinement.
+        for idx, mask in enumerate(np_masks):
+            shape = mask.shape
+            assert len(shape) == 2  # Assert length to satisfy type checker.
+            polygon = mask_to_polygon(mask)
+            mask = polygon_to_mask(polygon, shape)
+            np_masks[idx] = mask
+
+        # Convert np masks back to tensor masks.
+        masks = [torch.tensor(mask, dtype=torch.bool) for mask in np_masks]
+
+        return masks
+
+    def _filter_masks(self, masks: list[torch.Tensor], bounding_boxes: list[BoundingBoxField]) -> list[torch.Tensor]:
+        """Filter the detected masks based on the specified mask filter."""
+        assert len(masks) == len(bounding_boxes)
+
+        if self.mask_filter == "all":
+            return masks
+        elif self.mask_filter == "largest":
+            # Find the largest mask.
+            return [max(masks, key=lambda x: float(x.sum()))]
+        elif self.mask_filter == "highest_box_score":
+            # Find the index of the bounding box with the highest score.
+            # Note that we fallback to -1.0 if the score is None. This is mainly to satisfy the type checker. In most
+            # cases the scores should all be non-None when using this filtering mode. That being said, -1.0 is a
+            # reasonable fallback since the expected score range is [0.0, 1.0].
+            max_score_idx = max(range(len(bounding_boxes)), key=lambda i: bounding_boxes[i].score or -1.0)
+            return [masks[max_score_idx]]
+        else:
+            raise ValueError(f"Invalid mask filter: {self.mask_filter}")
--- a/invokeai/app/invocations/spandrel_image_to_image.py
+++ b/invokeai/app/invocations/spandrel_image_to_image.py
@@ -0,0 +1,253 @@
+from typing import Callable
+
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    ImageField,
+    InputField,
+    UIType,
+    WithBoard,
+    WithMetadata,
+)
+from invokeai.app.invocations.model import ModelIdentifierField
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.session_processor.session_processor_common import CanceledException
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
+from invokeai.backend.tiles.tiles import calc_tiles_min_overlap
+from invokeai.backend.tiles.utils import TBLR, Tile
+
+
+@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.3.0")
+class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel)."""
+
+    image: ImageField = InputField(description="The input image")
+    image_to_image_model: ModelIdentifierField = InputField(
+        title="Image-to-Image Model",
+        description=FieldDescriptions.spandrel_image_to_image_model,
+        ui_type=UIType.SpandrelImageToImageModel,
+    )
+    tile_size: int = InputField(
+        default=512, description="The tile size for tiled image-to-image. Set to 0 to disable tiling."
+    )
+
+    @classmethod
+    def scale_tile(cls, tile: Tile, scale: int) -> Tile:
+        return Tile(
+            coords=TBLR(
+                top=tile.coords.top * scale,
+                bottom=tile.coords.bottom * scale,
+                left=tile.coords.left * scale,
+                right=tile.coords.right * scale,
+            ),
+            overlap=TBLR(
+                top=tile.overlap.top * scale,
+                bottom=tile.overlap.bottom * scale,
+                left=tile.overlap.left * scale,
+                right=tile.overlap.right * scale,
+            ),
+        )
+
+    @classmethod
+    def upscale_image(
+        cls,
+        image: Image.Image,
+        tile_size: int,
+        spandrel_model: SpandrelImageToImageModel,
+        is_canceled: Callable[[], bool],
+    ) -> Image.Image:
+        # Compute the image tiles.
+        if tile_size > 0:
+            min_overlap = 20
+            tiles = calc_tiles_min_overlap(
+                image_height=image.height,
+                image_width=image.width,
+                tile_height=tile_size,
+                tile_width=tile_size,
+                min_overlap=min_overlap,
+            )
+        else:
+            # No tiling. Generate a single tile that covers the entire image.
+            min_overlap = 0
+            tiles = [
+                Tile(
+                    coords=TBLR(top=0, bottom=image.height, left=0, right=image.width),
+                    overlap=TBLR(top=0, bottom=0, left=0, right=0),
+                )
+            ]
+
+        # Sort tiles first by left x coordinate, then by top y coordinate. During tile processing, we want to iterate
+        # over tiles left-to-right, top-to-bottom.
+        tiles = sorted(tiles, key=lambda x: x.coords.left)
+        tiles = sorted(tiles, key=lambda x: x.coords.top)
+
+        # Prepare input image for inference.
+        image_tensor = SpandrelImageToImageModel.pil_to_tensor(image)
+
+        # Scale the tiles for re-assembling the final image.
+        scale = spandrel_model.scale
+        scaled_tiles = [cls.scale_tile(tile, scale=scale) for tile in tiles]
+
+        # Prepare the output tensor.
+        _, channels, height, width = image_tensor.shape
+        output_tensor = torch.zeros(
+            (height * scale, width * scale, channels), dtype=torch.uint8, device=torch.device("cpu")
+        )
+
+        image_tensor = image_tensor.to(device=spandrel_model.device, dtype=spandrel_model.dtype)
+
+        # Run the model on each tile.
+        for tile, scaled_tile in tqdm(list(zip(tiles, scaled_tiles, strict=True)), desc="Upscaling Tiles"):
+            # Exit early if the invocation has been canceled.
+            if is_canceled():
+                raise CanceledException
+
+            # Extract the current tile from the input tensor.
+            input_tile = image_tensor[
+                :, :, tile.coords.top : tile.coords.bottom, tile.coords.left : tile.coords.right
+            ].to(device=spandrel_model.device, dtype=spandrel_model.dtype)
+
+            # Run the model on the tile.
+            output_tile = spandrel_model.run(input_tile)
+
+            # Convert the output tile into the output tensor's format.
+            # (N, C, H, W) -> (C, H, W)
+            output_tile = output_tile.squeeze(0)
+            # (C, H, W) -> (H, W, C)
+            output_tile = output_tile.permute(1, 2, 0)
+            output_tile = output_tile.clamp(0, 1)
+            output_tile = (output_tile * 255).to(dtype=torch.uint8, device=torch.device("cpu"))
+
+            # Merge the output tile into the output tensor.
+            # We only keep half of the overlap on the top and left side of the tile. We do this in case there are
+            # edge artifacts. We don't bother with any 'blending' in the current implementation - for most upscalers
+            # it seems unnecessary, but we may find a need in the future.
+            top_overlap = scaled_tile.overlap.top // 2
+            left_overlap = scaled_tile.overlap.left // 2
+            output_tensor[
+                scaled_tile.coords.top + top_overlap : scaled_tile.coords.bottom,
+                scaled_tile.coords.left + left_overlap : scaled_tile.coords.right,
+                :,
+            ] = output_tile[top_overlap:, left_overlap:, :]
+
+        # Convert the output tensor to a PIL image.
+        np_image = output_tensor.detach().numpy().astype(np.uint8)
+        pil_image = Image.fromarray(np_image)
+
+        return pil_image
+
+    @torch.inference_mode()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
+        # revisit this.
+        image = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        # Load the model.
+        spandrel_model_info = context.models.load(self.image_to_image_model)
+
+        # Do the upscaling.
+        with spandrel_model_info as spandrel_model:
+            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+
+            # Upscale the image
+            pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
+
+        image_dto = context.images.save(image=pil_image)
+        return ImageOutput.build(image_dto)
+
+
+@invocation(
+    "spandrel_image_to_image_autoscale",
+    title="Image-to-Image (Autoscale)",
+    tags=["upscale"],
+    category="upscale",
+    version="1.0.0",
+)
+class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
+    """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached."""
+
+    scale: float = InputField(
+        default=4.0,
+        gt=0.0,
+        le=16.0,
+        description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
+    )
+    fit_to_multiple_of_8: bool = InputField(
+        default=False,
+        description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
+    )
+
+    @torch.inference_mode()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
+        # revisit this.
+        image = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        # Load the model.
+        spandrel_model_info = context.models.load(self.image_to_image_model)
+
+        # The target size of the image, determined by the provided scale. We'll run the upscaler until we hit this size.
+        # Later, we may mutate this value if the model doesn't upscale the image or if the user requested a multiple of 8.
+        target_width = int(image.width * self.scale)
+        target_height = int(image.height * self.scale)
+
+        # Do the upscaling.
+        with spandrel_model_info as spandrel_model:
+            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+
+            # First pass of upscaling. Note: `pil_image` will be mutated.
+            pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
+
+            # Some models don't upscale the image, but we have no way to know this in advance. We'll check if the model
+            # upscaled the image and run the loop below if it did. We'll require the model to upscale both dimensions
+            # to be considered an upscale model.
+            is_upscale_model = pil_image.width > image.width and pil_image.height > image.height
+
+            if is_upscale_model:
+                # This is an upscale model, so we should keep upscaling until we reach the target size.
+                iterations = 1
+                while pil_image.width < target_width or pil_image.height < target_height:
+                    pil_image = self.upscale_image(pil_image, self.tile_size, spandrel_model, context.util.is_canceled)
+                    iterations += 1
+
+                    # Sanity check to prevent excessive or infinite loops. All known upscaling models are at least 2x.
+                    # Our max scale is 16x, so with a 2x model, we should never exceed 16x == 2^4 -> 4 iterations.
+                    # We'll allow one extra iteration "just in case" and bail at 5 upscaling iterations. In practice,
+                    # we should never reach this limit.
+                    if iterations >= 5:
+                        context.logger.warning(
+                            "Upscale loop reached maximum iteration count of 5, stopping upscaling early."
+                        )
+                        break
+            else:
+                # This model doesn't upscale the image. We should ignore the scale parameter, modifying the output size
+                # to be the same as the processed image size.
+
+                # The output size is now the size of the processed image.
+                target_width = pil_image.width
+                target_height = pil_image.height
+
+                # Warn the user if they requested a scale greater than 1.
+                if self.scale > 1:
+                    context.logger.warning(
+                        "Model does not increase the size of the image, but a greater scale than 1 was requested. Image will not be scaled."
+                    )
+
+        # We may need to resize the image to a multiple of 8. Use floor division to ensure we don't scale the image up
+        # in the final resize
+        if self.fit_to_multiple_of_8:
+            target_width = int(target_width // 8 * 8)
+            target_height = int(target_height // 8 * 8)
+
+        # Final resize. Per PIL documentation, Lanczos provides the best quality for both upscale and downscale.
+        # See: https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters-comparison-table
+        pil_image = pil_image.resize((target_width, target_height), resample=Image.Resampling.LANCZOS)
+
+        image_dto = context.images.save(image=pil_image)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -175,6 +175,10 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
        _, _, latent_height, latent_width = latents.shape

        # Calculate the tile locations to cover the latent-space image.
+        # TODO(ryand): In the future, we may want to revisit the tile overlap strategy. Things to consider:
+        # - How much overlap 'context' to provide for each denoising step.
+        # - How much overlap to use during merging/blending.
+        # - Should we 'jitter' the tile locations in each step so that the seams are in different places?
        tiles = calc_tiles_min_overlap(
            image_height=latent_height,
            image_width=latent_width,
@@ -218,7 +222,8 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
                context=context,
                positive_conditioning_field=self.positive_conditioning,
                negative_conditioning_field=self.negative_conditioning,
-                unet=unet,
+                device=unet.device,
+                dtype=unet.dtype,
                latent_height=latent_tile_height,
                latent_width=latent_tile_width,
                cfg_scale=self.cfg_scale,
--- a/invokeai/app/services/model_install/model_install_base.py
+++ b/invokeai/app/services/model_install/model_install_base.py
@@ -3,7 +3,7 @@

 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import List, Optional, Union

 from pydantic.networks import AnyHttpUrl

@@ -12,7 +12,7 @@ from invokeai.app.services.download import DownloadQueueServiceBase
 from invokeai.app.services.events.events_base import EventServiceBase
 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.model_install.model_install_common import ModelInstallJob, ModelSource
-from invokeai.app.services.model_records import ModelRecordServiceBase
+from invokeai.app.services.model_records import ModelRecordChanges, ModelRecordServiceBase
 from invokeai.backend.model_manager import AnyModelConfig


@@ -64,7 +64,7 @@ class ModelInstallServiceBase(ABC):
    def register_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:
        """
        Probe and register the model at model_path.
@@ -72,7 +72,7 @@ class ModelInstallServiceBase(ABC):
        This keeps the model in its current location.

        :param model_path: Filesystem Path to the model.
-        :param config: Dict of attributes that will override autoassigned values.
+        :param config: ModelRecordChanges object that will override autoassigned model record values.
        :returns id: The string ID of the registered model.
        """

@@ -92,7 +92,7 @@ class ModelInstallServiceBase(ABC):
    def install_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:
        """
        Probe, register and install the model in the models directory.
@@ -101,7 +101,7 @@ class ModelInstallServiceBase(ABC):
        the models directory handled by InvokeAI.

        :param model_path: Filesystem Path to the model.
-        :param config: Dict of attributes that will override autoassigned values.
+        :param config: ModelRecordChanges object that will override autoassigned model record values.
        :returns id: The string ID of the registered model.
        """

@@ -109,14 +109,14 @@ class ModelInstallServiceBase(ABC):
    def heuristic_import(
        self,
        source: str,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
        access_token: Optional[str] = None,
        inplace: Optional[bool] = False,
    ) -> ModelInstallJob:
        r"""Install the indicated model using heuristics to interpret user intentions.

        :param source: String source
-        :param config: Optional dict. Any fields in this dict
+        :param config: Optional ModelRecordChanges object. Any fields in this object
         will override corresponding autoassigned probe fields in the
         model's config record as described in `import_model()`.
        :param access_token: Optional access token for remote sources.
@@ -147,7 +147,7 @@ class ModelInstallServiceBase(ABC):
    def import_model(
        self,
        source: ModelSource,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        """Install the indicated model.

--- a/invokeai/app/services/model_install/model_install_common.py
+++ b/invokeai/app/services/model_install/model_install_common.py
@@ -2,13 +2,14 @@ import re
 import traceback
 from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, Literal, Optional, Set, Union
+from typing import Literal, Optional, Set, Union

 from pydantic import BaseModel, Field, PrivateAttr, field_validator
 from pydantic.networks import AnyHttpUrl
 from typing_extensions import Annotated

 from invokeai.app.services.download import DownloadJob, MultiFileDownloadJob
+from invokeai.app.services.model_records import ModelRecordChanges
 from invokeai.backend.model_manager import AnyModelConfig, ModelRepoVariant
 from invokeai.backend.model_manager.config import ModelSourceType
 from invokeai.backend.model_manager.metadata import AnyModelRepoMetadata
@@ -133,8 +134,9 @@ class ModelInstallJob(BaseModel):
    id: int = Field(description="Unique ID for this job")
    status: InstallStatus = Field(default=InstallStatus.WAITING, description="Current status of install process")
    error_reason: Optional[str] = Field(default=None, description="Information about why the job failed")
-    config_in: Dict[str, Any] = Field(
-        default_factory=dict, description="Configuration information (e.g. 'description') to apply to model."
+    config_in: ModelRecordChanges = Field(
+        default_factory=ModelRecordChanges,
+        description="Configuration information (e.g. 'description') to apply to model.",
    )
    config_out: Optional[AnyModelConfig] = Field(
        default=None, description="After successful installation, this will hold the configuration object."
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@@ -163,26 +163,27 @@ class ModelInstallService(ModelInstallServiceBase):
    def register_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:  # noqa D102
        model_path = Path(model_path)
-        config = config or {}
-        if not config.get("source"):
-            config["source"] = model_path.resolve().as_posix()
-        config["source_type"] = ModelSourceType.Path
+        config = config or ModelRecordChanges()
+        if not config.source:
+            config.source = model_path.resolve().as_posix()
+        config.source_type = ModelSourceType.Path
        return self._register(model_path, config)

    def install_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:  # noqa D102
        model_path = Path(model_path)
-        config = config or {}
+        config = config or ModelRecordChanges()
+        info: AnyModelConfig = ModelProbe.probe(
+            Path(model_path), config.model_dump(), hash_algo=self._app_config.hashing_algorithm
+        )  # type: ignore

-        info: AnyModelConfig = ModelProbe.probe(Path(model_path), config, hash_algo=self._app_config.hashing_algorithm)
-
-        if preferred_name := config.get("name"):
+        if preferred_name := config.name:
            preferred_name = Path(preferred_name).with_suffix(model_path.suffix)

        dest_path = (
@@ -204,7 +205,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def heuristic_import(
        self,
        source: str,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
        access_token: Optional[str] = None,
        inplace: Optional[bool] = False,
    ) -> ModelInstallJob:
@@ -216,7 +217,7 @@ class ModelInstallService(ModelInstallServiceBase):
            source_obj.access_token = access_token
        return self.import_model(source_obj, config)

-    def import_model(self, source: ModelSource, config: Optional[Dict[str, Any]] = None) -> ModelInstallJob:  # noqa D102
+    def import_model(self, source: ModelSource, config: Optional[ModelRecordChanges] = None) -> ModelInstallJob:  # noqa D102
        similar_jobs = [x for x in self.list_jobs() if x.source == source and not x.in_terminal_state]
        if similar_jobs:
            self._logger.warning(f"There is already an active install job for {source}. Not enqueuing.")
@@ -318,16 +319,17 @@ class ModelInstallService(ModelInstallServiceBase):
                        model_path = self._app_config.models_path / model_path
                    model_path = model_path.resolve()

-                    config: dict[str, Any] = {}
-                    config["name"] = model_name
-                    config["description"] = stanza.get("description")
+                    config = ModelRecordChanges(
+                        name=model_name,
+                        description=stanza.get("description"),
+                    )
                    legacy_config_path = stanza.get("config")
                    if legacy_config_path:
                        # In v3, these paths were relative to the root. Migrate them to be relative to the legacy_conf_dir.
                        legacy_config_path = self._app_config.root_path / legacy_config_path
                        if legacy_config_path.is_relative_to(self._app_config.legacy_conf_path):
                            legacy_config_path = legacy_config_path.relative_to(self._app_config.legacy_conf_path)
-                        config["config_path"] = str(legacy_config_path)
+                        config.config_path = str(legacy_config_path)
                    try:
                        id = self.register_path(model_path=model_path, config=config)
                        self._logger.info(f"Migrated {model_name} with id {id}")
@@ -500,11 +502,11 @@ class ModelInstallService(ModelInstallServiceBase):
        job.total_bytes = self._stat_size(job.local_path)
        job.bytes = job.total_bytes
        self._signal_job_running(job)
-        job.config_in["source"] = str(job.source)
-        job.config_in["source_type"] = MODEL_SOURCE_TO_TYPE_MAP[job.source.__class__]
+        job.config_in.source = str(job.source)
+        job.config_in.source_type = MODEL_SOURCE_TO_TYPE_MAP[job.source.__class__]
        # enter the metadata, if there is any
        if isinstance(job.source_metadata, (HuggingFaceMetadata)):
-            job.config_in["source_api_response"] = job.source_metadata.api_response
+            job.config_in.source_api_response = job.source_metadata.api_response

        if job.inplace:
            key = self.register_path(job.local_path, job.config_in)
@@ -639,11 +641,11 @@ class ModelInstallService(ModelInstallServiceBase):
        return new_path

    def _register(
-        self, model_path: Path, config: Optional[Dict[str, Any]] = None, info: Optional[AnyModelConfig] = None
+        self, model_path: Path, config: Optional[ModelRecordChanges] = None, info: Optional[AnyModelConfig] = None
    ) -> str:
-        config = config or {}
+        config = config or ModelRecordChanges()

-        info = info or ModelProbe.probe(model_path, config, hash_algo=self._app_config.hashing_algorithm)
+        info = info or ModelProbe.probe(model_path, config.model_dump(), hash_algo=self._app_config.hashing_algorithm)  # type: ignore

        model_path = model_path.resolve()

@@ -674,11 +676,13 @@ class ModelInstallService(ModelInstallServiceBase):
        precision = TorchDevice.choose_torch_dtype()
        return ModelRepoVariant.FP16 if precision == torch.float16 else None

-    def _import_local_model(self, source: LocalModelSource, config: Optional[Dict[str, Any]]) -> ModelInstallJob:
+    def _import_local_model(
+        self, source: LocalModelSource, config: Optional[ModelRecordChanges] = None
+    ) -> ModelInstallJob:
        return ModelInstallJob(
            id=self._next_id(),
            source=source,
-            config_in=config or {},
+            config_in=config or ModelRecordChanges(),
            local_path=Path(source.path),
            inplace=source.inplace or False,
        )
@@ -686,7 +690,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def _import_from_hf(
        self,
        source: HFModelSource,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        # Add user's cached access token to HuggingFace requests
        if source.access_token is None:
@@ -702,7 +706,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def _import_from_url(
        self,
        source: URLModelSource,
-        config: Optional[Dict[str, Any]],
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        remote_files, metadata = self._remote_files_from_source(source)
        return self._import_remote_model(
@@ -717,7 +721,7 @@ class ModelInstallService(ModelInstallServiceBase):
        source: HFModelSource | URLModelSource,
        remote_files: List[RemoteModelFile],
        metadata: Optional[AnyModelRepoMetadata],
-        config: Optional[Dict[str, Any]],
+        config: Optional[ModelRecordChanges],
    ) -> ModelInstallJob:
        if len(remote_files) == 0:
            raise ValueError(f"{source}: No downloadable files found")
@@ -730,7 +734,7 @@ class ModelInstallService(ModelInstallServiceBase):
        install_job = ModelInstallJob(
            id=self._next_id(),
            source=source,
-            config_in=config or {},
+            config_in=config or ModelRecordChanges(),
            source_metadata=metadata,
            local_path=destdir,  # local path may change once the download has started due to content-disposition handling
            bytes=0,
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -18,6 +18,7 @@ from invokeai.backend.model_manager.config import (
    ControlAdapterDefaultSettings,
    MainModelDefaultSettings,
    ModelFormat,
+    ModelSourceType,
    ModelType,
    ModelVariantType,
    SchedulerPredictionType,
@@ -66,10 +67,16 @@ class ModelRecordChanges(BaseModelExcludeNull):
    """A set of changes to apply to a model."""

    # Changes applicable to all models
+    source: Optional[str] = Field(description="original source of the model", default=None)
+    source_type: Optional[ModelSourceType] = Field(description="type of model source", default=None)
+    source_api_response: Optional[str] = Field(description="metadata from remote source", default=None)
    name: Optional[str] = Field(description="Name of the model.", default=None)
    path: Optional[str] = Field(description="Path to the model.", default=None)
    description: Optional[str] = Field(description="Model description", default=None)
    base: Optional[BaseModelType] = Field(description="The base model.", default=None)
+    type: Optional[ModelType] = Field(description="Type of model", default=None)
+    key: Optional[str] = Field(description="Database ID for this model", default=None)
+    hash: Optional[str] = Field(description="hash of model file", default=None)
    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
    default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field(
        description="Default settings for this model", default=None
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/backend/image_util/grounding_dino/init.py
+++ b/invokeai/backend/image_util/grounding_dino/init.py
--- a/invokeai/backend/image_util/grounding_dino/detection_result.py
+++ b/invokeai/backend/image_util/grounding_dino/detection_result.py
@@ -0,0 +1,22 @@
+from pydantic import BaseModel, ConfigDict
+
+
+class BoundingBox(BaseModel):
+    """Bounding box helper class."""
+
+    xmin: int
+    ymin: int
+    xmax: int
+    ymax: int
+
+
+class DetectionResult(BaseModel):
+    """Detection result from Grounding DINO."""
+
+    score: float
+    label: str
+    box: BoundingBox
+    model_config = ConfigDict(
+        # Allow arbitrary types for mask, since it will be a numpy array.
+        arbitrary_types_allowed=True
+    )
--- a/invokeai/backend/image_util/grounding_dino/grounding_dino_pipeline.py
+++ b/invokeai/backend/image_util/grounding_dino/grounding_dino_pipeline.py
@@ -0,0 +1,36 @@
+from typing import Optional
+
+import torch
+from PIL import Image
+from transformers.pipelines import ZeroShotObjectDetectionPipeline
+
+from invokeai.backend.image_util.grounding_dino.detection_result import DetectionResult
+from invokeai.backend.raw_model import RawModel
+
+
+class GroundingDinoPipeline(RawModel):
+    """A wrapper class for a ZeroShotObjectDetectionPipeline that makes it compatible with the model manager's memory
+    management system.
+    """
+
+    def __init__(self, pipeline: ZeroShotObjectDetectionPipeline):
+        self._pipeline = pipeline
+
+    def detect(self, image: Image.Image, candidate_labels: list[str], threshold: float = 0.1) -> list[DetectionResult]:
+        results = self._pipeline(image=image, candidate_labels=candidate_labels, threshold=threshold)
+        results = [DetectionResult.model_validate(result) for result in results]
+        return results
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
+        # HACK(ryand): The GroundingDinoPipeline does not work on MPS devices. We only allow it to be moved to CPU or
+        # CUDA.
+        if device is not None and device.type not in {"cpu", "cuda"}:
+            device = None
+        self._pipeline.model.to(device=device, dtype=dtype)
+        self._pipeline.device = self._pipeline.model.device
+
+    def calc_size(self) -> int:
+        # HACK(ryand): Fix the circular import issue.
+        from invokeai.backend.model_manager.load.model_util import calc_module_size
+
+        return calc_module_size(self._pipeline.model)
--- a/invokeai/backend/image_util/segment_anything/init.py
+++ b/invokeai/backend/image_util/segment_anything/init.py
--- a/invokeai/backend/image_util/segment_anything/mask_refinement.py
+++ b/invokeai/backend/image_util/segment_anything/mask_refinement.py
@@ -0,0 +1,50 @@
+# This file contains utilities for Grounded-SAM mask refinement based on:
+# https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+
+
+import cv2
+import numpy as np
+import numpy.typing as npt
+
+
+def mask_to_polygon(mask: npt.NDArray[np.uint8]) -> list[tuple[int, int]]:
+    """Convert a binary mask to a polygon.
+
+    Returns:
+        list[list[int]]: List of (x, y) coordinates representing the vertices of the polygon.
+    """
+    # Find contours in the binary mask.
+    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    # Find the contour with the largest area.
+    largest_contour = max(contours, key=cv2.contourArea)
+
+    # Extract the vertices of the contour.
+    polygon = largest_contour.reshape(-1, 2).tolist()
+
+    return polygon
+
+
+def polygon_to_mask(
+    polygon: list[tuple[int, int]], image_shape: tuple[int, int], fill_value: int = 1
+) -> npt.NDArray[np.uint8]:
+    """Convert a polygon to a segmentation mask.
+
+    Args:
+        polygon (list): List of (x, y) coordinates representing the vertices of the polygon.
+        image_shape (tuple): Shape of the image (height, width) for the mask.
+        fill_value (int): Value to fill the polygon with.
+
+    Returns:
+        np.ndarray: Segmentation mask with the polygon filled (with value 255).
+    """
+    # Create an empty mask.
+    mask = np.zeros(image_shape, dtype=np.uint8)
+
+    # Convert polygon to an array of points.
+    pts = np.array(polygon, dtype=np.int32)
+
+    # Fill the polygon with white color (255).
+    cv2.fillPoly(mask, [pts], color=(fill_value,))
+
+    return mask
--- a/invokeai/backend/image_util/segment_anything/segment_anything_model.py
+++ b/invokeai/backend/image_util/segment_anything/segment_anything_model.py
@@ -0,0 +1,53 @@
+from typing import Optional
+
+import torch
+from PIL import Image
+from transformers.models.sam import SamModel
+from transformers.models.sam.processing_sam import SamProcessor
+
+from invokeai.backend.raw_model import RawModel
+
+
+class SegmentAnythingModel(RawModel):
+    """A wrapper class for the transformers SAM model and processor that makes it compatible with the model manager."""
+
+    def __init__(self, sam_model: SamModel, sam_processor: SamProcessor):
+        self._sam_model = sam_model
+        self._sam_processor = sam_processor
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
+        # HACK(ryand): The SAM pipeline does not work on MPS devices. We only allow it to be moved to CPU or CUDA.
+        if device is not None and device.type not in {"cpu", "cuda"}:
+            device = None
+        self._sam_model.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        # HACK(ryand): Fix the circular import issue.
+        from invokeai.backend.model_manager.load.model_util import calc_module_size
+
+        return calc_module_size(self._sam_model)
+
+    def segment(self, image: Image.Image, bounding_boxes: list[list[int]]) -> torch.Tensor:
+        """Run the SAM model.
+
+        Args:
+            image (Image.Image): The image to segment.
+            bounding_boxes (list[list[int]]): The bounding box prompts. Each bounding box is in the format
+                [xmin, ymin, xmax, ymax].
+
+        Returns:
+            torch.Tensor: The segmentation masks. dtype: torch.bool. shape: [num_masks, channels, height, width].
+        """
+        # Add batch dimension of 1 to the bounding boxes.
+        boxes = [bounding_boxes]
+        inputs = self._sam_processor(images=image, input_boxes=boxes, return_tensors="pt").to(self._sam_model.device)
+        outputs = self._sam_model(**inputs)
+        masks = self._sam_processor.post_process_masks(
+            masks=outputs.pred_masks,
+            original_sizes=inputs.original_sizes,
+            reshaped_input_sizes=inputs.reshaped_input_sizes,
+        )
+
+        # There should be only one batch.
+        assert len(masks) == 1
+        return masks[0]
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -124,16 +124,14 @@ class IPAdapter(RawModel):
            self.device, dtype=self.dtype
        )

-    def to(
-        self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, non_blocking: bool = False
-    ):
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        if device is not None:
            self.device = device
        if dtype is not None:
            self.dtype = dtype

-        self._image_proj_model.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
-        self.attn_weights.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
+        self._image_proj_model.to(device=self.device, dtype=self.dtype)
+        self.attn_weights.to(device=self.device, dtype=self.dtype)

    def calc_size(self) -> int:
        # HACK(ryand): Fix this issue with circular imports.
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -11,7 +11,6 @@ from typing_extensions import Self

 from invokeai.backend.model_manager import BaseModelType
 from invokeai.backend.raw_model import RawModel
-from invokeai.backend.util.devices import TorchDevice


 class LoRALayerBase:
@@ -57,14 +56,9 @@ class LoRALayerBase:
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.bias = self.bias.to(device=device, dtype=dtype)


 # TODO: find and debug lora/locon with bias
@@ -106,19 +100,14 @@ class LoRALayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
-        super().to(device=device, dtype=dtype, non_blocking=non_blocking)
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)

-        self.up = self.up.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.down = self.down.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)

        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.mid = self.mid.to(device=device, dtype=dtype)


 class LoHALayer(LoRALayerBase):
@@ -167,23 +156,18 @@ class LoHALayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

-        self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t1 = self.t1.to(device=device, dtype=dtype)

-        self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t2 = self.t2.to(device=device, dtype=dtype)


 class LoKRLayer(LoRALayerBase):
@@ -264,12 +248,7 @@ class LoKRLayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

        if self.w1 is not None:
@@ -277,19 +256,19 @@ class LoKRLayer(LoRALayerBase):
        else:
            assert self.w1_a is not None
            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)

        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w2 = self.w2.to(device=device, dtype=dtype)
        else:
            assert self.w2_a is not None
            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)

        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t2 = self.t2.to(device=device, dtype=dtype)


 class FullLayer(LoRALayerBase):
@@ -319,15 +298,10 @@ class FullLayer(LoRALayerBase):
        model_size += self.weight.nelement() * self.weight.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.weight = self.weight.to(device=device, dtype=dtype)


 class IA3Layer(LoRALayerBase):
@@ -359,16 +333,11 @@ class IA3Layer(LoRALayerBase):
        model_size += self.on_input.nelement() * self.on_input.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ):
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.on_input = self.on_input.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.on_input = self.on_input.to(device=device, dtype=dtype)


 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
@@ -390,15 +359,10 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
    def name(self) -> str:
        return self._name

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        # TODO: try revert if exception?
        for _key, layer in self.layers.items():
-            layer.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            layer.to(device=device, dtype=dtype)

    def calc_size(self) -> int:
        model_size = 0
@@ -521,7 +485,7 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
            # lower memory consumption by removing already parsed layer values
            state_dict[layer_key].clear()

-            layer.to(device=device, dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device))
+            layer.to(device=device, dtype=dtype)
            model.layers[layer_key] = layer

        return model
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -67,6 +67,7 @@ class ModelType(str, Enum):
    IPAdapter = "ip_adapter"
    CLIPVision = "clip_vision"
    T2IAdapter = "t2i_adapter"
+    SpandrelImageToImage = "spandrel_image_to_image"


 class SubModelType(str, Enum):
@@ -353,7 +354,7 @@ class CLIPVisionDiffusersConfig(DiffusersConfigBase):
    """Model config for CLIPVision."""

    type: Literal[ModelType.CLIPVision] = ModelType.CLIPVision
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
@@ -364,13 +365,24 @@ class T2IAdapterConfig(DiffusersConfigBase, ControlAdapterConfigBase):
    """Model config for T2I."""

    type: Literal[ModelType.T2IAdapter] = ModelType.T2IAdapter
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
        return Tag(f"{ModelType.T2IAdapter.value}.{ModelFormat.Diffusers.value}")


+class SpandrelImageToImageConfig(ModelConfigBase):
+    """Model config for Spandrel Image to Image models."""
+
+    type: Literal[ModelType.SpandrelImageToImage] = ModelType.SpandrelImageToImage
+    format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint
+
+    @staticmethod
+    def get_tag() -> Tag:
+        return Tag(f"{ModelType.SpandrelImageToImage.value}.{ModelFormat.Checkpoint.value}")
+
+
 def get_model_discriminator_value(v: Any) -> str:
    """
    Computes the discriminator value for a model config.
@@ -407,6 +419,7 @@ AnyModelConfig = Annotated[
        Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()],
        Annotated[IPAdapterCheckpointConfig, IPAdapterCheckpointConfig.get_tag()],
        Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()],
+        Annotated[SpandrelImageToImageConfig, SpandrelImageToImageConfig.get_tag()],
        Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
    ],
    Discriminator(get_model_discriminator_value),
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -167,7 +167,8 @@ class ModelCache(ModelCacheBase[AnyModel]):
        size = calc_model_size_by_data(self.logger, model)
        self.make_room(size)

-        state_dict = model.state_dict() if isinstance(model, torch.nn.Module) else None
+        running_on_cpu = self.execution_device == torch.device("cpu")
+        state_dict = model.state_dict() if isinstance(model, torch.nn.Module) and not running_on_cpu else None
        cache_record = CacheRecord(key=key, model=model, device=self.storage_device, state_dict=state_dict, size=size)
        self._cached_models[key] = cache_record
        self._cache_stack.append(key)
@@ -289,11 +290,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
                else:
                    new_dict: Dict[str, torch.Tensor] = {}
                    for k, v in cache_entry.state_dict.items():
-                        new_dict[k] = v.to(
-                            target_device, copy=True, non_blocking=TorchDevice.get_non_blocking(target_device)
-                        )
+                        new_dict[k] = v.to(target_device, copy=True)
                    cache_entry.model.load_state_dict(new_dict, assign=True)
-            cache_entry.model.to(target_device, non_blocking=TorchDevice.get_non_blocking(target_device))
+            cache_entry.model.to(target_device)
            cache_entry.device = target_device
        except Exception as e:  # blow away cache entry
            self._delete_cache_entry(cache_entry)
--- a/invokeai/backend/model_manager/load/model_loaders/spandrel_image_to_image.py
+++ b/invokeai/backend/model_manager/load/model_loaders/spandrel_image_to_image.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+from typing import Optional
+
+import torch
+
+from invokeai.backend.model_manager.config import (
+    AnyModel,
+    AnyModelConfig,
+    BaseModelType,
+    ModelFormat,
+    ModelType,
+    SubModelType,
+)
+from invokeai.backend.model_manager.load.load_default import ModelLoader
+from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
+from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
+
+
+@ModelLoaderRegistry.register(
+    base=BaseModelType.Any, type=ModelType.SpandrelImageToImage, format=ModelFormat.Checkpoint
+)
+class SpandrelImageToImageModelLoader(ModelLoader):
+    """Class for loading Spandrel Image-to-Image models (i.e. models wrapped by spandrel.ImageModelDescriptor)."""
+
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if submodel_type is not None:
+            raise ValueError("Unexpected submodel requested for Spandrel model.")
+
+        model_path = Path(config.path)
+        model = SpandrelImageToImageModel.load_from_file(model_path)
+
+        torch_dtype = self._torch_dtype
+        if not model.supports_dtype(torch_dtype):
+            self._logger.warning(
+                f"The configured dtype ('{self._torch_dtype}') is not supported by the {model.get_model_type_name()} "
+                "model. Falling back to 'float32'."
+            )
+            torch_dtype = torch.float32
+        model.to(dtype=torch_dtype)
+
+        return model
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -98,6 +98,9 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                ModelVariantType.Normal: StableDiffusionXLPipeline,
                ModelVariantType.Inpaint: StableDiffusionXLInpaintPipeline,
            },
+            BaseModelType.StableDiffusionXLRefiner: {
+                ModelVariantType.Normal: StableDiffusionXLPipeline,
+            },
        }
        assert isinstance(config, MainCheckpointConfig)
        try:
--- a/invokeai/backend/model_manager/load/model_util.py
+++ b/invokeai/backend/model_manager/load/model_util.py
@@ -11,10 +11,13 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from transformers import CLIPTokenizer

+from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
+from invokeai.backend.image_util.segment_anything.segment_anything_model import SegmentAnythingModel
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager.config import AnyModel
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
+from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw


@@ -33,7 +36,17 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
    elif isinstance(model, CLIPTokenizer):
        # TODO(ryand): Accurately calculate the tokenizer's size. It's small enough that it shouldn't matter for now.
        return 0
-    elif isinstance(model, (TextualInversionModelRaw, IPAdapter, LoRAModelRaw)):
+    elif isinstance(
+        model,
+        (
+            TextualInversionModelRaw,
+            IPAdapter,
+            LoRAModelRaw,
+            SpandrelImageToImageModel,
+            GroundingDinoPipeline,
+            SegmentAnythingModel,
+        ),
+    ):
        return model.calc_size()
    else:
        # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any, Dict, Literal, Optional, Union

 import safetensors.torch
+import spandrel
 import torch
 from picklescan.scanner import scan_file_path

@@ -25,6 +26,7 @@ from invokeai.backend.model_manager.config import (
    SchedulerPredictionType,
 )
 from invokeai.backend.model_manager.util.model_util import lora_token_vector_length, read_checkpoint_meta
+from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
 from invokeai.backend.util.silence_warnings import SilenceWarnings

 CkptType = Dict[str | int, Any]
@@ -220,24 +222,46 @@ class ModelProbe(object):
        ckpt = ckpt.get("state_dict", ckpt)

        for key in [str(k) for k in ckpt.keys()]:
-            if any(key.startswith(v) for v in {"cond_stage_model.", "first_stage_model.", "model.diffusion_model."}):
+            if key.startswith(("cond_stage_model.", "first_stage_model.", "model.diffusion_model.")):
                return ModelType.Main
-            elif any(key.startswith(v) for v in {"encoder.conv_in", "decoder.conv_in"}):
+            elif key.startswith(("encoder.conv_in", "decoder.conv_in")):
                return ModelType.VAE
-            elif any(key.startswith(v) for v in {"lora_te_", "lora_unet_"}):
+            elif key.startswith(("lora_te_", "lora_unet_")):
                return ModelType.LoRA
-            elif any(key.endswith(v) for v in {"to_k_lora.up.weight", "to_q_lora.down.weight"}):
+            elif key.endswith(("to_k_lora.up.weight", "to_q_lora.down.weight")):
                return ModelType.LoRA
-            elif any(key.startswith(v) for v in {"controlnet", "control_model", "input_blocks"}):
+            elif key.startswith(("controlnet", "control_model", "input_blocks")):
                return ModelType.ControlNet
-            elif any(key.startswith(v) for v in {"image_proj.", "ip_adapter."}):
+            elif key.startswith(("image_proj.", "ip_adapter.")):
                return ModelType.IPAdapter
            elif key in {"emb_params", "string_to_param"}:
                return ModelType.TextualInversion
-        else:
-            # diffusers-ti
-            if len(ckpt) < 10 and all(isinstance(v, torch.Tensor) for v in ckpt.values()):
-                return ModelType.TextualInversion
+
+        # diffusers-ti
+        if len(ckpt) < 10 and all(isinstance(v, torch.Tensor) for v in ckpt.values()):
+            return ModelType.TextualInversion
+
+        # Check if the model can be loaded as a SpandrelImageToImageModel.
+        # This check is intentionally performed last, as it can be expensive (it requires loading the model from disk).
+        try:
+            # It would be nice to avoid having to load the Spandrel model from disk here. A couple of options were
+            # explored to avoid this:
+            # 1. Call `SpandrelImageToImageModel.load_from_state_dict(ckpt)`, where `ckpt` is a state_dict on the meta
+            #    device. Unfortunately, some Spandrel models perform operations during initialization that are not
+            #    supported on meta tensors.
+            # 2. Spandrel has internal logic to determine a model's type from its state_dict before loading the model.
+            #    This logic is not exposed in spandrel's public API. We could copy the logic here, but then we have to
+            #    maintain it, and the risk of false positive detections is higher.
+            SpandrelImageToImageModel.load_from_file(model_path)
+            return ModelType.SpandrelImageToImage
+        except spandrel.UnsupportedModelError:
+            pass
+        except RuntimeError as e:
+            if "No such file or directory" in str(e):
+                # This error is expected if the model_path does not exist (which is the case in some unit tests).
+                pass
+            else:
+                raise e

        raise InvalidModelConfigException(f"Unable to determine model type for {model_path}")

@@ -569,6 +593,11 @@ class T2IAdapterCheckpointProbe(CheckpointProbeBase):
        raise NotImplementedError()


+class SpandrelImageToImageCheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self) -> BaseModelType:
+        return BaseModelType.Any
+
+
 ########################################################
 # classes for probing folders
 #######################################################
@@ -776,6 +805,11 @@ class CLIPVisionFolderProbe(FolderProbeBase):
        return BaseModelType.Any


+class SpandrelImageToImageFolderProbe(FolderProbeBase):
+    def get_base_type(self) -> BaseModelType:
+        raise NotImplementedError()
+
+
 class T2IAdapterFolderProbe(FolderProbeBase):
    def get_base_type(self) -> BaseModelType:
        config_file = self.model_path / "config.json"
@@ -805,6 +839,7 @@ ModelProbe.register_probe("diffusers", ModelType.ControlNet, ControlNetFolderPro
 ModelProbe.register_probe("diffusers", ModelType.IPAdapter, IPAdapterFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.CLIPVision, CLIPVisionFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.T2IAdapter, T2IAdapterFolderProbe)
+ModelProbe.register_probe("diffusers", ModelType.SpandrelImageToImage, SpandrelImageToImageFolderProbe)

 ModelProbe.register_probe("checkpoint", ModelType.Main, PipelineCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.VAE, VaeCheckpointProbe)
@@ -814,5 +849,6 @@ ModelProbe.register_probe("checkpoint", ModelType.ControlNet, ControlNetCheckpoi
 ModelProbe.register_probe("checkpoint", ModelType.IPAdapter, IPAdapterCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.CLIPVision, CLIPVisionCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.T2IAdapter, T2IAdapterCheckpointProbe)
+ModelProbe.register_probe("checkpoint", ModelType.SpandrelImageToImage, SpandrelImageToImageCheckpointProbe)

 ModelProbe.register_probe("onnx", ModelType.ONNX, ONNXFolderProbe)
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -187,157 +187,171 @@ STARTER_MODELS: list[StarterModel] = [
    # endregion
    # region ControlNet
    StarterModel(
-        name="QRCode Monster",
+        name="QRCode Monster v2 (SD1.5)",
        base=BaseModelType.StableDiffusion1,
-        source="monster-labs/control_v1p_sd15_qrcode_monster",
-        description="Controlnet model that generates scannable creative QR codes",
+        source="monster-labs/control_v1p_sd15_qrcode_monster::v2",
+        description="ControlNet model that generates scannable creative QR codes",
+        type=ModelType.ControlNet,
+    ),
+    StarterModel(
+        name="QRCode Monster (SDXL)",
+        base=BaseModelType.StableDiffusionXL,
+        source="monster-labs/control_v1p_sdxl_qrcode_monster",
+        description="ControlNet model that generates scannable creative QR codes",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_canny",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning.",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="inpaint",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_inpaint",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, inpaint version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, inpaint version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="mlsd",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_mlsd",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, MLSD version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, MLSD version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1p_sd15_depth",
-        description="Controlnet weights trained on sd-1.5 with depth conditioning",
+        description="ControlNet weights trained on sd-1.5 with depth conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="normal_bae",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_normalbae",
-        description="Controlnet weights trained on sd-1.5 with normalbae image conditioning",
+        description="ControlNet weights trained on sd-1.5 with normalbae image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="seg",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_seg",
-        description="Controlnet weights trained on sd-1.5 with seg image conditioning",
+        description="ControlNet weights trained on sd-1.5 with seg image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_lineart",
-        description="Controlnet weights trained on sd-1.5 with lineart image conditioning",
+        description="ControlNet weights trained on sd-1.5 with lineart image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart_anime",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15s2_lineart_anime",
-        description="Controlnet weights trained on sd-1.5 with anime image conditioning",
+        description="ControlNet weights trained on sd-1.5 with anime image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_openpose",
-        description="Controlnet weights trained on sd-1.5 with openpose image conditioning",
+        description="ControlNet weights trained on sd-1.5 with openpose image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_scribble",
-        description="Controlnet weights trained on sd-1.5 with scribble image conditioning",
+        description="ControlNet weights trained on sd-1.5 with scribble image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_softedge",
-        description="Controlnet weights trained on sd-1.5 with soft edge conditioning",
+        description="ControlNet weights trained on sd-1.5 with soft edge conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="shuffle",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_shuffle",
-        description="Controlnet weights trained on sd-1.5 with shuffle image conditioning",
+        description="ControlNet weights trained on sd-1.5 with shuffle image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="tile",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1e_sd15_tile",
-        description="Controlnet weights trained on sd-1.5 with tiled image conditioning",
+        description="ControlNet weights trained on sd-1.5 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="ip2p",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_ip2p",
-        description="Controlnet weights trained on sd-1.5 with ip2p conditioning.",
+        description="ControlNet weights trained on sd-1.5 with ip2p conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-canny-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
+        source="xinsir/controlNet-canny-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with depth conditioning.",
+        source="diffusers/controlNet-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with depth conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge-dexined-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-softedge-dexined",
-        description="Controlnet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
+        source="SargeZT/controlNet-sd-xl-1.0-softedge-dexined",
+        description="ControlNet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-16bit-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-depth-16bit-zoe",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
+        source="SargeZT/controlNet-sd-xl-1.0-depth-16bit-zoe",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-zoe-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
+        source="diffusers/controlNet-zoe-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-openpose-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
+        source="xinsir/controlNet-openpose-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-scribble-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
+        source="xinsir/controlNet-scribble-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
+        type=ModelType.ControlNet,
+    ),
+    StarterModel(
+        name="tile-sdxl",
+        base=BaseModelType.StableDiffusionXL,
+        source="xinsir/controlNet-tile-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    # endregion
@@ -399,6 +413,43 @@ STARTER_MODELS: list[StarterModel] = [
        type=ModelType.T2IAdapter,
    ),
    # endregion
+    # region SpandrelImageToImage
+    StarterModel(
+        name="RealESRGAN_x4plus_anime_6B",
+        base=BaseModelType.Any,
+        source="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
+        description="A Real-ESRGAN 4x upscaling model (optimized for anime images).",
+        type=ModelType.SpandrelImageToImage,
+    ),
+    StarterModel(
+        name="RealESRGAN_x4plus",
+        base=BaseModelType.Any,
+        source="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
+        description="A Real-ESRGAN 4x upscaling model (general-purpose).",
+        type=ModelType.SpandrelImageToImage,
+    ),
+    StarterModel(
+        name="ESRGAN_SRx4_DF2KOST_official",
+        base=BaseModelType.Any,
+        source="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth",
+        description="The official ESRGAN 4x upscaling model.",
+        type=ModelType.SpandrelImageToImage,
+    ),
+    StarterModel(
+        name="RealESRGAN_x2plus",
+        base=BaseModelType.Any,
+        source="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
+        description="A Real-ESRGAN 2x upscaling model (general-purpose).",
+        type=ModelType.SpandrelImageToImage,
+    ),
+    StarterModel(
+        name="SwinIR - realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN",
+        base=BaseModelType.Any,
+        source="https://github.com/JingyunLiang/SwinIR/releases/download/v0.0/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN-with-dict-keys-params-and-params_ema.pth",
+        description="A SwinIR 4x upscaling model.",
+        type=ModelType.SpandrelImageToImage,
+    ),
+    # endregion
 ]

 assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models"
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@@ -5,7 +5,7 @@ from __future__ import annotations

 import pickle
 from contextlib import contextmanager
-from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
+from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Type, Union

 import numpy as np
 import torch
@@ -32,8 +32,27 @@ with LoRAHelper.apply_lora_unet(unet, loras):
 """


-# TODO: rename smth like ModelPatcher and add TI method?
 class ModelPatcher:
+    @staticmethod
+    @contextmanager
+    def patch_unet_attention_processor(unet: UNet2DConditionModel, processor_cls: Type[Any]):
+        """A context manager that patches `unet` with the provided attention processor.
+
+        Args:
+            unet (UNet2DConditionModel): The UNet model to patch.
+            processor (Type[Any]): Class which will be initialized for each key and passed to set_attn_processor(...).
+        """
+        unet_orig_processors = unet.attn_processors
+
+        # create separate instance for each attention, to be able modify each attention separately
+        unet_new_processors = {key: processor_cls() for key in unet_orig_processors.keys()}
+        try:
+            unet.set_attn_processor(unet_new_processors)
+            yield None
+
+        finally:
+            unet.set_attn_processor(unet_orig_processors)
+
    @staticmethod
    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
        assert "." not in lora_key
@@ -139,15 +158,12 @@ class ModelPatcher:
                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
                        # same thing in a single call to '.to(...)'.
-                        layer.to(device=device, non_blocking=TorchDevice.get_non_blocking(device))
-                        layer.to(dtype=torch.float32, non_blocking=TorchDevice.get_non_blocking(device))
+                        layer.to(device=device)
+                        layer.to(dtype=torch.float32)
                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
-                        layer.to(
-                            device=TorchDevice.CPU_DEVICE,
-                            non_blocking=TorchDevice.get_non_blocking(TorchDevice.CPU_DEVICE),
-                        )
+                        layer.to(device=TorchDevice.CPU_DEVICE)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
                        if module.weight.shape != layer_weight.shape:
@@ -156,7 +172,7 @@ class ModelPatcher:
                            layer_weight = layer_weight.reshape(module.weight.shape)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
-                        module.weight += layer_weight.to(dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device))
+                        module.weight += layer_weight.to(dtype=dtype)

            yield  # wait for context manager exit

@@ -164,9 +180,7 @@ class ModelPatcher:
            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
            with torch.no_grad():
                for module_key, weight in original_weights.items():
-                    model.get_submodule(module_key).weight.copy_(
-                        weight, non_blocking=TorchDevice.get_non_blocking(weight.device)
-                    )
+                    model.get_submodule(module_key).weight.copy_(weight)

    @classmethod
    @contextmanager
--- a/invokeai/backend/onnx/onnx_runtime.py
+++ b/invokeai/backend/onnx/onnx_runtime.py
@@ -190,12 +190,7 @@ class IAIOnnxRuntimeModel(RawModel):
        return self.session.run(None, inputs)

    # compatability with RawModel ABC
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        pass

    # compatability with diffusers load code
--- a/invokeai/backend/raw_model.py
+++ b/invokeai/backend/raw_model.py
@@ -1,15 +1,3 @@
-"""Base class for 'Raw' models.
-
-The RawModel class is the base class of LoRAModelRaw and TextualInversionModelRaw,
-and is used for type checking of calls to the model patcher. Its main purpose
-is to avoid a circular import issues when lora.py tries to import BaseModelType
-from invokeai.backend.model_manager.config, and the latter tries to import LoRAModelRaw
-from lora.py.
-
-The term 'raw' was introduced to describe a wrapper around a torch.nn.Module
-that adds additional methods and attributes.
-"""
-
 from abc import ABC, abstractmethod
 from typing import Optional

@@ -17,13 +5,18 @@ import torch


 class RawModel(ABC):
-    """Abstract base class for 'Raw' model wrappers."""
+    """Base class for 'Raw' models.
+
+    The RawModel class is the base class of LoRAModelRaw, TextualInversionModelRaw, etc.
+    and is used for type checking of calls to the model patcher. Its main purpose
+    is to avoid a circular import issues when lora.py tries to import BaseModelType
+    from invokeai.backend.model_manager.config, and the latter tries to import LoRAModelRaw
+    from lora.py.
+
+    The term 'raw' was introduced to describe a wrapper around a torch.nn.Module
+    that adds additional methods and attributes.
+    """

    @abstractmethod
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        pass
--- a/invokeai/backend/spandrel_image_to_image_model.py
+++ b/invokeai/backend/spandrel_image_to_image_model.py
@@ -0,0 +1,139 @@
+from pathlib import Path
+from typing import Any, Optional
+
+import numpy as np
+import torch
+from PIL import Image
+from spandrel import ImageModelDescriptor, ModelLoader
+
+from invokeai.backend.raw_model import RawModel
+
+
+class SpandrelImageToImageModel(RawModel):
+    """A wrapper for a Spandrel Image-to-Image model.
+
+    The main reason for having a wrapper class is to integrate with the type handling of RawModel.
+    """
+
+    def __init__(self, spandrel_model: ImageModelDescriptor[Any]):
+        self._spandrel_model = spandrel_model
+
+    @staticmethod
+    def pil_to_tensor(image: Image.Image) -> torch.Tensor:
+        """Convert PIL Image to the torch.Tensor format expected by SpandrelImageToImageModel.run().
+
+        Args:
+            image (Image.Image): A PIL Image with shape (H, W, C) and values in the range [0, 255].
+
+        Returns:
+            torch.Tensor: A torch.Tensor with shape (N, C, H, W) and values in the range [0, 1].
+        """
+        image_np = np.array(image)
+        # (H, W, C) -> (C, H, W)
+        image_np = np.transpose(image_np, (2, 0, 1))
+        image_np = image_np / 255
+        image_tensor = torch.from_numpy(image_np).float()
+        # (C, H, W) -> (N, C, H, W)
+        image_tensor = image_tensor.unsqueeze(0)
+        return image_tensor
+
+    @staticmethod
+    def tensor_to_pil(tensor: torch.Tensor) -> Image.Image:
+        """Convert a torch.Tensor produced by SpandrelImageToImageModel.run() to a PIL Image.
+
+        Args:
+            tensor (torch.Tensor): A torch.Tensor with shape (N, C, H, W) and values in the range [0, 1].
+
+        Returns:
+            Image.Image: A PIL Image with shape (H, W, C) and values in the range [0, 255].
+        """
+        # (N, C, H, W) -> (C, H, W)
+        tensor = tensor.squeeze(0)
+        # (C, H, W) -> (H, W, C)
+        tensor = tensor.permute(1, 2, 0)
+        tensor = tensor.clamp(0, 1)
+        tensor = (tensor * 255).cpu().detach().numpy().astype(np.uint8)
+        image = Image.fromarray(tensor)
+        return image
+
+    def run(self, image_tensor: torch.Tensor) -> torch.Tensor:
+        """Run the image-to-image model.
+
+        Args:
+            image_tensor (torch.Tensor): A torch.Tensor with shape (N, C, H, W) and values in the range [0, 1].
+        """
+        return self._spandrel_model(image_tensor)
+
+    @classmethod
+    def load_from_file(cls, file_path: str | Path):
+        model = ModelLoader().load_from_file(file_path)
+        if not isinstance(model, ImageModelDescriptor):
+            raise ValueError(
+                f"Loaded a spandrel model of type '{type(model)}'. Only image-to-image models are supported "
+                "('ImageModelDescriptor')."
+            )
+
+        return cls(spandrel_model=model)
+
+    @classmethod
+    def load_from_state_dict(cls, state_dict: dict[str, torch.Tensor]):
+        model = ModelLoader().load_from_state_dict(state_dict)
+        if not isinstance(model, ImageModelDescriptor):
+            raise ValueError(
+                f"Loaded a spandrel model of type '{type(model)}'. Only image-to-image models are supported "
+                "('ImageModelDescriptor')."
+            )
+
+        return cls(spandrel_model=model)
+
+    def supports_dtype(self, dtype: torch.dtype) -> bool:
+        """Check if the model supports the given dtype."""
+        if dtype == torch.float16:
+            return self._spandrel_model.supports_half
+        elif dtype == torch.bfloat16:
+            return self._spandrel_model.supports_bfloat16
+        elif dtype == torch.float32:
+            # All models support float32.
+            return True
+        else:
+            raise ValueError(f"Unexpected dtype '{dtype}'.")
+
+    def get_model_type_name(self) -> str:
+        """The model type name. Intended for logging / debugging purposes. Do not rely on this field remaining
+        consistent over time.
+        """
+        return str(type(self._spandrel_model.model))
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        non_blocking: bool = False,
+    ) -> None:
+        """Note: Some models have limited dtype support. Call supports_dtype(...) to check if the dtype is supported.
+        Note: The non_blocking parameter is currently ignored."""
+        # TODO(ryand): spandrel.ImageModelDescriptor.to(...) does not support non_blocking. We will have to access the
+        # model directly if we want to apply this optimization.
+        self._spandrel_model.to(device=device, dtype=dtype)
+
+    @property
+    def device(self) -> torch.device:
+        """The device of the underlying model."""
+        return self._spandrel_model.device
+
+    @property
+    def dtype(self) -> torch.dtype:
+        """The dtype of the underlying model."""
+        return self._spandrel_model.dtype
+
+    @property
+    def scale(self) -> int:
+        """The scale of the model (e.g. 1x, 2x, 4x, etc.)."""
+        return self._spandrel_model.scale
+
+    def calc_size(self) -> int:
+        """Get size of the model in memory in bytes."""
+        # HACK(ryand): Fix this issue with circular imports.
+        from invokeai.backend.model_manager.load.model_util import calc_module_size
+
+        return calc_module_size(self._spandrel_model.model)
--- a/invokeai/backend/stable_diffusion/init.py
+++ b/invokeai/backend/stable_diffusion/init.py
@@ -7,11 +7,9 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import (  # noqa: F401
    StableDiffusionGeneratorPipeline,
 )
 from invokeai.backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent  # noqa: F401
-from invokeai.backend.stable_diffusion.seamless import set_seamless  # noqa: F401

 __all__ = [
    "PipelineIntermediateState",
    "StableDiffusionGeneratorPipeline",
    "InvokeAIDiffuserComponent",
-    "set_seamless",
 ]
--- a/invokeai/backend/stable_diffusion/denoise_context.py
+++ b/invokeai/backend/stable_diffusion/denoise_context.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type, Union
+
+import torch
+from diffusers import UNet2DConditionModel
+from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode, TextConditioningData
+
+
+@dataclass
+class UNetKwargs:
+    sample: torch.Tensor
+    timestep: Union[torch.Tensor, float, int]
+    encoder_hidden_states: torch.Tensor
+
+    class_labels: Optional[torch.Tensor] = None
+    timestep_cond: Optional[torch.Tensor] = None
+    attention_mask: Optional[torch.Tensor] = None
+    cross_attention_kwargs: Optional[Dict[str, Any]] = None
+    added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None
+    down_block_additional_residuals: Optional[Tuple[torch.Tensor]] = None
+    mid_block_additional_residual: Optional[torch.Tensor] = None
+    down_intrablock_additional_residuals: Optional[Tuple[torch.Tensor]] = None
+    encoder_attention_mask: Optional[torch.Tensor] = None
+    # return_dict: bool = True
+
+
+@dataclass
+class DenoiseInputs:
+    """Initial variables passed to denoise. Supposed to be unchanged."""
+
+    # The latent-space image to denoise.
+    # Shape: [batch, channels, latent_height, latent_width]
+    # - If we are inpainting, this is the initial latent image before noise has been added.
+    # - If we are generating a new image, this should be initialized to zeros.
+    # - In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner).
+    orig_latents: torch.Tensor
+
+    # kwargs forwarded to the scheduler.step() method.
+    scheduler_step_kwargs: dict[str, Any]
+
+    # Text conditionging data.
+    conditioning_data: TextConditioningData
+
+    # Noise used for two purposes:
+    # 1. Used by the scheduler to noise the initial `latents` before denoising.
+    # 2. Used to noise the `masked_latents` when inpainting.
+    # `noise` should be None if the `latents` tensor has already been noised.
+    # Shape: [1 or batch, channels, latent_height, latent_width]
+    noise: Optional[torch.Tensor]
+
+    # The seed used to generate the noise for the denoising process.
+    # HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
+    # same noise used earlier in the pipeline. This should really be handled in a clearer way.
+    seed: int
+
+    # The timestep schedule for the denoising process.
+    timesteps: torch.Tensor
+
+    # The first timestep in the schedule. This is used to determine the initial noise level, so
+    # should be populated if you want noise applied *even* if timesteps is empty.
+    init_timestep: torch.Tensor
+
+    # Class of attention processor that is used.
+    attention_processor_cls: Type[Any]
+
+
+@dataclass
+class DenoiseContext:
+    """Context with all variables in denoise"""
+
+    # Initial variables passed to denoise. Supposed to be unchanged.
+    inputs: DenoiseInputs
+
+    # Scheduler which used to apply noise predictions.
+    scheduler: SchedulerMixin
+
+    # UNet model.
+    unet: Optional[UNet2DConditionModel] = None
+
+    # Current state of latent-space image in denoising process.
+    # None until `PRE_DENOISE_LOOP` callback.
+    # Shape: [batch, channels, latent_height, latent_width]
+    latents: Optional[torch.Tensor] = None
+
+    # Current denoising step index.
+    # None until `PRE_STEP` callback.
+    step_index: Optional[int] = None
+
+    # Current denoising step timestep.
+    # None until `PRE_STEP` callback.
+    timestep: Optional[torch.Tensor] = None
+
+    # Arguments which will be passed to UNet model.
+    # Available in `PRE_UNET`/`POST_UNET` callbacks, otherwise will be None.
+    unet_kwargs: Optional[UNetKwargs] = None
+
+    # SchedulerOutput class returned from step function(normally, generated by scheduler).
+    # Supposed to be used only in `POST_STEP` callback, otherwise can be None.
+    step_output: Optional[SchedulerOutput] = None
+
+    # Scaled version of `latents`, which will be passed to unet_kwargs initialization.
+    # Available in events inside step(between `PRE_STEP` and `POST_STEP`).
+    # Shape: [batch, channels, latent_height, latent_width]
+    latent_model_input: Optional[torch.Tensor] = None
+
+    # [TMP] Defines on which conditionings current unet call will be runned.
+    # Available in `PRE_UNET`/`POST_UNET` callbacks, otherwise will be None.
+    conditioning_mode: Optional[ConditioningMode] = None
+
+    # [TMP] Noise predictions from negative conditioning.
+    # Available in `POST_COMBINE_NOISE_PREDS` callback, otherwise will be None.
+    # Shape: [batch, channels, latent_height, latent_width]
+    negative_noise_pred: Optional[torch.Tensor] = None
+
+    # [TMP] Noise predictions from positive conditioning.
+    # Available in `POST_COMBINE_NOISE_PREDS` callback, otherwise will be None.
+    # Shape: [batch, channels, latent_height, latent_width]
+    positive_noise_pred: Optional[torch.Tensor] = None
+
+    # Combined noise prediction from passed conditionings.
+    # Available in `POST_COMBINE_NOISE_PREDS` callback, otherwise will be None.
+    # Shape: [batch, channels, latent_height, latent_width]
+    noise_pred: Optional[torch.Tensor] = None
+
+    # Dictionary for extensions to pass extra info about denoise process to other extensions.
+    extra: dict = field(default_factory=dict)
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -23,21 +23,12 @@ from invokeai.app.services.config.config_default import get_config
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import IPAdapterData, TextConditioningData
 from invokeai.backend.stable_diffusion.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent
 from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher, UNetIPAdapterData
+from invokeai.backend.stable_diffusion.extensions.preview import PipelineIntermediateState
 from invokeai.backend.util.attention import auto_detect_slice_size
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.hotfixes import ControlNetModel


-@dataclass
-class PipelineIntermediateState:
-    step: int
-    order: int
-    total_steps: int
-    timestep: int
-    latents: torch.Tensor
-    predicted_original: Optional[torch.Tensor] = None
-
-
@dataclass
 class AddsMaskGuidance:
    mask: torch.Tensor
--- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
@@ -1,10 +1,17 @@
+from __future__ import annotations
+
 import math
 from dataclasses import dataclass
-from typing import List, Optional, Union
+from enum import Enum
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union

 import torch

-from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
+from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData
+
+if TYPE_CHECKING:
+    from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
+    from invokeai.backend.stable_diffusion.denoise_context import UNetKwargs


@dataclass
@@ -95,6 +102,12 @@ class TextConditioningRegions:
        assert self.masks.shape[1] == len(self.ranges)


+class ConditioningMode(Enum):
+    Both = "both"
+    Negative = "negative"
+    Positive = "positive"
+
+
 class TextConditioningData:
    def __init__(
        self,
@@ -103,7 +116,7 @@ class TextConditioningData:
        uncond_regions: Optional[TextConditioningRegions],
        cond_regions: Optional[TextConditioningRegions],
        guidance_scale: Union[float, List[float]],
-        guidance_rescale_multiplier: float = 0,
+        guidance_rescale_multiplier: float = 0,  # TODO: old backend, remove
    ):
        self.uncond_text = uncond_text
        self.cond_text = cond_text
@@ -114,6 +127,7 @@ class TextConditioningData:
        # Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
        # images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
        self.guidance_scale = guidance_scale
+        # TODO: old backend, remove
        # For models trained using zero-terminal SNR ("ztsnr"), it's suggested to use guidance_rescale_multiplier of 0.7.
        # See [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
        self.guidance_rescale_multiplier = guidance_rescale_multiplier
@@ -121,3 +135,114 @@ class TextConditioningData:
    def is_sdxl(self):
        assert isinstance(self.uncond_text, SDXLConditioningInfo) == isinstance(self.cond_text, SDXLConditioningInfo)
        return isinstance(self.cond_text, SDXLConditioningInfo)
+
+    def to_unet_kwargs(self, unet_kwargs: UNetKwargs, conditioning_mode: ConditioningMode):
+        """Fills unet arguments with data from provided conditionings.
+
+        Args:
+            unet_kwargs (UNetKwargs): Object which stores UNet model arguments.
+            conditioning_mode (ConditioningMode): Describes which conditionings should be used.
+        """
+        _, _, h, w = unet_kwargs.sample.shape
+        device = unet_kwargs.sample.device
+        dtype = unet_kwargs.sample.dtype
+
+        # TODO: combine regions with conditionings
+        if conditioning_mode == ConditioningMode.Both:
+            conditionings = [self.uncond_text, self.cond_text]
+            c_regions = [self.uncond_regions, self.cond_regions]
+        elif conditioning_mode == ConditioningMode.Positive:
+            conditionings = [self.cond_text]
+            c_regions = [self.cond_regions]
+        elif conditioning_mode == ConditioningMode.Negative:
+            conditionings = [self.uncond_text]
+            c_regions = [self.uncond_regions]
+        else:
+            raise ValueError(f"Unexpected conditioning mode: {conditioning_mode}")
+
+        encoder_hidden_states, encoder_attention_mask = self._concat_conditionings_for_batch(
+            [c.embeds for c in conditionings]
+        )
+
+        unet_kwargs.encoder_hidden_states = encoder_hidden_states
+        unet_kwargs.encoder_attention_mask = encoder_attention_mask
+
+        if self.is_sdxl():
+            added_cond_kwargs = dict(  # noqa: C408
+                text_embeds=torch.cat([c.pooled_embeds for c in conditionings]),
+                time_ids=torch.cat([c.add_time_ids for c in conditionings]),
+            )
+
+            unet_kwargs.added_cond_kwargs = added_cond_kwargs
+
+        if any(r is not None for r in c_regions):
+            tmp_regions = []
+            for c, r in zip(conditionings, c_regions, strict=True):
+                if r is None:
+                    r = TextConditioningRegions(
+                        masks=torch.ones((1, 1, h, w), dtype=dtype),
+                        ranges=[Range(start=0, end=c.embeds.shape[1])],
+                    )
+                tmp_regions.append(r)
+
+            if unet_kwargs.cross_attention_kwargs is None:
+                unet_kwargs.cross_attention_kwargs = {}
+
+            unet_kwargs.cross_attention_kwargs.update(
+                regional_prompt_data=RegionalPromptData(regions=tmp_regions, device=device, dtype=dtype),
+            )
+
+    @staticmethod
+    def _pad_zeros(t: torch.Tensor, pad_shape: tuple, dim: int) -> torch.Tensor:
+        return torch.cat([t, torch.zeros(pad_shape, device=t.device, dtype=t.dtype)], dim=dim)
+
+    @classmethod
+    def _pad_conditioning(
+        cls,
+        cond: torch.Tensor,
+        target_len: int,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Pad provided conditioning tensor to target_len by zeros and returns mask of unpadded bytes.
+
+        Args:
+            cond (torch.Tensor): Conditioning tensor which to pads by zeros.
+            target_len (int): To which length(tokens count) pad tensor.
+        """
+        conditioning_attention_mask = torch.ones((cond.shape[0], cond.shape[1]), device=cond.device, dtype=cond.dtype)
+
+        if cond.shape[1] < target_len:
+            conditioning_attention_mask = cls._pad_zeros(
+                conditioning_attention_mask,
+                pad_shape=(cond.shape[0], target_len - cond.shape[1]),
+                dim=1,
+            )
+
+            cond = cls._pad_zeros(
+                cond,
+                pad_shape=(cond.shape[0], target_len - cond.shape[1], cond.shape[2]),
+                dim=1,
+            )
+
+        return cond, conditioning_attention_mask
+
+    @classmethod
+    def _concat_conditionings_for_batch(
+        cls,
+        conditionings: List[torch.Tensor],
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        """Concatenate provided conditioning tensors to one batched tensor.
+        If tensors have different sizes then pad them by zeros and creates
+        encoder_attention_mask to exclude padding from attention.
+
+        Args:
+            conditionings (List[torch.Tensor]): List of conditioning tensors to concatenate.
+        """
+        encoder_attention_mask = None
+        max_len = max([c.shape[1] for c in conditionings])
+        if any(c.shape[1] != max_len for c in conditionings):
+            encoder_attention_masks = [None] * len(conditionings)
+            for i in range(len(conditionings)):
+                conditionings[i], encoder_attention_masks[i] = cls._pad_conditioning(conditionings[i], max_len)
+            encoder_attention_mask = torch.cat(encoder_attention_masks)
+
+        return torch.cat(conditionings), encoder_attention_mask
--- a/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py
@@ -1,9 +1,14 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import torch
 import torch.nn.functional as F

-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
-    TextConditioningRegions,
-)
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
+        TextConditioningRegions,
+    )


 class RegionalPromptData:
--- a/invokeai/backend/stable_diffusion/diffusion_backend.py
+++ b/invokeai/backend/stable_diffusion/diffusion_backend.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import torch
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput
+from tqdm.auto import tqdm
+
+from invokeai.app.services.config.config_default import get_config
+from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, UNetKwargs
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
+
+
+class StableDiffusionBackend:
+    def __init__(
+        self,
+        unet: UNet2DConditionModel,
+        scheduler: SchedulerMixin,
+    ):
+        self.unet = unet
+        self.scheduler = scheduler
+        config = get_config()
+        self._sequential_guidance = config.sequential_guidance
+
+    def latents_from_embeddings(self, ctx: DenoiseContext, ext_manager: ExtensionsManager):
+        if ctx.inputs.init_timestep.shape[0] == 0:
+            return ctx.inputs.orig_latents
+
+        ctx.latents = ctx.inputs.orig_latents.clone()
+
+        if ctx.inputs.noise is not None:
+            batch_size = ctx.latents.shape[0]
+            # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
+            ctx.latents = ctx.scheduler.add_noise(
+                ctx.latents, ctx.inputs.noise, ctx.inputs.init_timestep.expand(batch_size)
+            )
+
+        # if no work to do, return latents
+        if ctx.inputs.timesteps.shape[0] == 0:
+            return ctx.latents
+
+        # ext: inpaint[pre_denoise_loop, priority=normal] (maybe init, but not sure if it needed)
+        # ext: preview[pre_denoise_loop, priority=low]
+        ext_manager.run_callback(ExtensionCallbackType.PRE_DENOISE_LOOP, ctx)
+
+        for ctx.step_index, ctx.timestep in enumerate(tqdm(ctx.inputs.timesteps)):  # noqa: B020
+            # ext: inpaint (apply mask to latents on non-inpaint models)
+            ext_manager.run_callback(ExtensionCallbackType.PRE_STEP, ctx)
+
+            # ext: tiles? [override: step]
+            ctx.step_output = self.step(ctx, ext_manager)
+
+            # ext: inpaint[post_step, priority=high] (apply mask to preview on non-inpaint models)
+            # ext: preview[post_step, priority=low]
+            ext_manager.run_callback(ExtensionCallbackType.POST_STEP, ctx)
+
+            ctx.latents = ctx.step_output.prev_sample
+
+        # ext: inpaint[post_denoise_loop] (restore unmasked part)
+        ext_manager.run_callback(ExtensionCallbackType.POST_DENOISE_LOOP, ctx)
+        return ctx.latents
+
+    @torch.inference_mode()
+    def step(self, ctx: DenoiseContext, ext_manager: ExtensionsManager) -> SchedulerOutput:
+        ctx.latent_model_input = ctx.scheduler.scale_model_input(ctx.latents, ctx.timestep)
+
+        # TODO: conditionings as list(conditioning_data.to_unet_kwargs - ready)
+        # Note: The current handling of conditioning doesn't feel very future-proof.
+        # This might change in the future as new requirements come up, but for now,
+        # this is the rough plan.
+        if self._sequential_guidance:
+            ctx.negative_noise_pred = self.run_unet(ctx, ext_manager, ConditioningMode.Negative)
+            ctx.positive_noise_pred = self.run_unet(ctx, ext_manager, ConditioningMode.Positive)
+        else:
+            both_noise_pred = self.run_unet(ctx, ext_manager, ConditioningMode.Both)
+            ctx.negative_noise_pred, ctx.positive_noise_pred = both_noise_pred.chunk(2)
+
+        # ext: override combine_noise_preds
+        ctx.noise_pred = self.combine_noise_preds(ctx)
+
+        # ext: cfg_rescale [modify_noise_prediction]
+        # TODO: rename
+        ext_manager.run_callback(ExtensionCallbackType.POST_COMBINE_NOISE_PREDS, ctx)
+
+        # compute the previous noisy sample x_t -> x_t-1
+        step_output = ctx.scheduler.step(ctx.noise_pred, ctx.timestep, ctx.latents, **ctx.inputs.scheduler_step_kwargs)
+
+        # clean up locals
+        ctx.latent_model_input = None
+        ctx.negative_noise_pred = None
+        ctx.positive_noise_pred = None
+        ctx.noise_pred = None
+
+        return step_output
+
+    @staticmethod
+    def combine_noise_preds(ctx: DenoiseContext) -> torch.Tensor:
+        guidance_scale = ctx.inputs.conditioning_data.guidance_scale
+        if isinstance(guidance_scale, list):
+            guidance_scale = guidance_scale[ctx.step_index]
+
+        # Note: Although this `torch.lerp(...)` line is logically equivalent to the current CFG line, it seems to result
+        # in slightly different outputs. It is suspected that this is caused by small precision differences.
+        # return torch.lerp(ctx.negative_noise_pred, ctx.positive_noise_pred, guidance_scale)
+        return ctx.negative_noise_pred + guidance_scale * (ctx.positive_noise_pred - ctx.negative_noise_pred)
+
+    def run_unet(self, ctx: DenoiseContext, ext_manager: ExtensionsManager, conditioning_mode: ConditioningMode):
+        sample = ctx.latent_model_input
+        if conditioning_mode == ConditioningMode.Both:
+            sample = torch.cat([sample] * 2)
+
+        ctx.unet_kwargs = UNetKwargs(
+            sample=sample,
+            timestep=ctx.timestep,
+            encoder_hidden_states=None,  # set later by conditoning
+            cross_attention_kwargs=dict(  # noqa: C408
+                percent_through=ctx.step_index / len(ctx.inputs.timesteps),
+            ),
+        )
+
+        ctx.conditioning_mode = conditioning_mode
+        ctx.inputs.conditioning_data.to_unet_kwargs(ctx.unet_kwargs, ctx.conditioning_mode)
+
+        # ext: controlnet/ip/t2i [pre_unet]
+        ext_manager.run_callback(ExtensionCallbackType.PRE_UNET, ctx)
+
+        # ext: inpaint [pre_unet, priority=low]
+        # or
+        # ext: inpaint [override: unet_forward]
+        noise_pred = self._unet_forward(**vars(ctx.unet_kwargs))
+
+        ext_manager.run_callback(ExtensionCallbackType.POST_UNET, ctx)
+
+        # clean up locals
+        ctx.unet_kwargs = None
+        ctx.conditioning_mode = None
+
+        return noise_pred
+
+    def _unet_forward(self, **kwargs) -> torch.Tensor:
+        return self.unet(**kwargs).sample
--- a/invokeai/backend/stable_diffusion/extension_callback_type.py
+++ b/invokeai/backend/stable_diffusion/extension_callback_type.py
@@ -0,0 +1,12 @@
+from enum import Enum
+
+
+class ExtensionCallbackType(Enum):
+    SETUP = "setup"
+    PRE_DENOISE_LOOP = "pre_denoise_loop"
+    POST_DENOISE_LOOP = "post_denoise_loop"
+    PRE_STEP = "pre_step"
+    POST_STEP = "post_step"
+    PRE_UNET = "pre_unet"
+    POST_UNET = "post_unet"
+    POST_COMBINE_NOISE_PREDS = "post_combine_noise_preds"
--- a/invokeai/backend/stable_diffusion/extensions/base.py
+++ b/invokeai/backend/stable_diffusion/extensions/base.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional
+
+import torch
+from diffusers import UNet2DConditionModel
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+    from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+
+
+@dataclass
+class CallbackMetadata:
+    callback_type: ExtensionCallbackType
+    order: int
+
+
+@dataclass
+class CallbackFunctionWithMetadata:
+    metadata: CallbackMetadata
+    function: Callable[[DenoiseContext], None]
+
+
+def callback(callback_type: ExtensionCallbackType, order: int = 0):
+    def _decorator(function):
+        function._ext_metadata = CallbackMetadata(
+            callback_type=callback_type,
+            order=order,
+        )
+        return function
+
+    return _decorator
+
+
+class ExtensionBase:
+    def __init__(self):
+        self._callbacks: Dict[ExtensionCallbackType, List[CallbackFunctionWithMetadata]] = {}
+
+        # Register all of the callback methods for this instance.
+        for func_name in dir(self):
+            func = getattr(self, func_name)
+            metadata = getattr(func, "_ext_metadata", None)
+            if metadata is not None and isinstance(metadata, CallbackMetadata):
+                if metadata.callback_type not in self._callbacks:
+                    self._callbacks[metadata.callback_type] = []
+                self._callbacks[metadata.callback_type].append(CallbackFunctionWithMetadata(metadata, func))
+
+    def get_callbacks(self):
+        return self._callbacks
+
+    @contextmanager
+    def patch_extension(self, ctx: DenoiseContext):
+        yield None
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        yield None
--- a/invokeai/backend/stable_diffusion/extensions/controlnet.py
+++ b/invokeai/backend/stable_diffusion/extensions/controlnet.py
@@ -0,0 +1,158 @@
+from __future__ import annotations
+
+import math
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, List, Optional, Union
+
+import torch
+from PIL.Image import Image
+
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
+from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, prepare_control_image
+from invokeai.backend.stable_diffusion.denoise_context import UNetKwargs
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+    from invokeai.backend.util.hotfixes import ControlNetModel
+
+
+class ControlNetExt(ExtensionBase):
+    def __init__(
+        self,
+        model: ControlNetModel,
+        image: Image,
+        weight: Union[float, List[float]],
+        begin_step_percent: float,
+        end_step_percent: float,
+        control_mode: CONTROLNET_MODE_VALUES,
+        resize_mode: CONTROLNET_RESIZE_VALUES,
+    ):
+        super().__init__()
+        self._model = model
+        self._image = image
+        self._weight = weight
+        self._begin_step_percent = begin_step_percent
+        self._end_step_percent = end_step_percent
+        self._control_mode = control_mode
+        self._resize_mode = resize_mode
+
+        self._image_tensor: Optional[torch.Tensor] = None
+
+    @contextmanager
+    def patch_extension(self, ctx: DenoiseContext):
+        original_processors = self._model.attn_processors
+        try:
+            self._model.set_attn_processor(ctx.inputs.attention_processor_cls())
+
+            yield None
+        finally:
+            self._model.set_attn_processor(original_processors)
+
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
+    def resize_image(self, ctx: DenoiseContext):
+        _, _, latent_height, latent_width = ctx.latents.shape
+        image_height = latent_height * LATENT_SCALE_FACTOR
+        image_width = latent_width * LATENT_SCALE_FACTOR
+
+        self._image_tensor = prepare_control_image(
+            image=self._image,
+            do_classifier_free_guidance=False,
+            width=image_width,
+            height=image_height,
+            device=ctx.latents.device,
+            dtype=ctx.latents.dtype,
+            control_mode=self._control_mode,
+            resize_mode=self._resize_mode,
+        )
+
+    @callback(ExtensionCallbackType.PRE_UNET)
+    def pre_unet_step(self, ctx: DenoiseContext):
+        # skip if model not active in current step
+        total_steps = len(ctx.inputs.timesteps)
+        first_step = math.floor(self._begin_step_percent * total_steps)
+        last_step = math.ceil(self._end_step_percent * total_steps)
+        if ctx.step_index < first_step or ctx.step_index > last_step:
+            return
+
+        # convert mode to internal flags
+        soft_injection = self._control_mode in ["more_prompt", "more_control"]
+        cfg_injection = self._control_mode in ["more_control", "unbalanced"]
+
+        # no negative conditioning in cfg_injection mode
+        if cfg_injection:
+            if ctx.conditioning_mode == ConditioningMode.Negative:
+                return
+            down_samples, mid_sample = self._run(ctx, soft_injection, ConditioningMode.Positive)
+
+            if ctx.conditioning_mode == ConditioningMode.Both:
+                # add zeros as samples for negative conditioning
+                down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples]
+                mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample])
+
+        else:
+            down_samples, mid_sample = self._run(ctx, soft_injection, ctx.conditioning_mode)
+
+        if (
+            ctx.unet_kwargs.down_block_additional_residuals is None
+            and ctx.unet_kwargs.mid_block_additional_residual is None
+        ):
+            ctx.unet_kwargs.down_block_additional_residuals = down_samples
+            ctx.unet_kwargs.mid_block_additional_residual = mid_sample
+        else:
+            # add controlnet outputs together if have multiple controlnets
+            ctx.unet_kwargs.down_block_additional_residuals = [
+                samples_prev + samples_curr
+                for samples_prev, samples_curr in zip(
+                    ctx.unet_kwargs.down_block_additional_residuals, down_samples, strict=True
+                )
+            ]
+            ctx.unet_kwargs.mid_block_additional_residual += mid_sample
+
+    def _run(self, ctx: DenoiseContext, soft_injection: bool, conditioning_mode: ConditioningMode):
+        total_steps = len(ctx.inputs.timesteps)
+
+        model_input = ctx.latent_model_input
+        image_tensor = self._image_tensor
+        if conditioning_mode == ConditioningMode.Both:
+            model_input = torch.cat([model_input] * 2)
+            image_tensor = torch.cat([image_tensor] * 2)
+
+        cn_unet_kwargs = UNetKwargs(
+            sample=model_input,
+            timestep=ctx.timestep,
+            encoder_hidden_states=None,  # set later by conditioning
+            cross_attention_kwargs=dict(  # noqa: C408
+                percent_through=ctx.step_index / total_steps,
+            ),
+        )
+
+        ctx.inputs.conditioning_data.to_unet_kwargs(cn_unet_kwargs, conditioning_mode=conditioning_mode)
+
+        # get static weight, or weight corresponding to current step
+        weight = self._weight
+        if isinstance(weight, list):
+            weight = weight[ctx.step_index]
+
+        tmp_kwargs = vars(cn_unet_kwargs)
+
+        # Remove kwargs not related to ControlNet unet
+        # ControlNet guidance fields
+        del tmp_kwargs["down_block_additional_residuals"]
+        del tmp_kwargs["mid_block_additional_residual"]
+
+        # T2i Adapter guidance fields
+        del tmp_kwargs["down_intrablock_additional_residuals"]
+
+        # controlnet(s) inference
+        down_samples, mid_sample = self._model(
+            controlnet_cond=image_tensor,
+            conditioning_scale=weight,  # controlnet specific, NOT the guidance scale
+            guess_mode=soft_injection,  # this is still called guess_mode in diffusers ControlNetModel
+            return_dict=False,
+            **vars(cn_unet_kwargs),
+        )
+
+        return down_samples, mid_sample
--- a/invokeai/backend/stable_diffusion/extensions/freeu.py
+++ b/invokeai/backend/stable_diffusion/extensions/freeu.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Dict, Optional
+
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
+
+if TYPE_CHECKING:
+    from invokeai.app.shared.models import FreeUConfig
+
+
+class FreeUExt(ExtensionBase):
+    def __init__(
+        self,
+        freeu_config: FreeUConfig,
+    ):
+        super().__init__()
+        self._freeu_config = freeu_config
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        unet.enable_freeu(
+            b1=self._freeu_config.b1,
+            b2=self._freeu_config.b2,
+            s1=self._freeu_config.s1,
+            s2=self._freeu_config.s2,
+        )
+
+        try:
+            yield
+        finally:
+            unet.disable_freeu()
--- a/invokeai/backend/stable_diffusion/extensions/inpaint.py
+++ b/invokeai/backend/stable_diffusion/extensions/inpaint.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import einops
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class InpaintExt(ExtensionBase):
+    """An extension for inpainting with non-inpainting models. See `InpaintModelExt` for inpainting with inpainting
+    models.
+    """
+
+    def __init__(
+        self,
+        mask: torch.Tensor,
+        is_gradient_mask: bool,
+    ):
+        """Initialize InpaintExt.
+        Args:
+            mask (torch.Tensor): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
+                inpainted.
+            is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
+                from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
+                1.
+        """
+        super().__init__()
+        self._mask = mask
+        self._is_gradient_mask = is_gradient_mask
+
+        # Noise, which used to noisify unmasked part of image
+        # if noise provided to context, then it will be used
+        # if no noise provided, then noise will be generated based on seed
+        self._noise: Optional[torch.Tensor] = None
+
+    @staticmethod
+    def _is_normal_model(unet: UNet2DConditionModel):
+        """Checks if the provided UNet belongs to a regular model.
+        The `in_channels` of a UNet vary depending on model type:
+        - normal - 4
+        - depth - 5
+        - inpaint - 9
+        """
+        return unet.conv_in.in_channels == 4
+
+    def _apply_mask(self, ctx: DenoiseContext, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+        batch_size = latents.size(0)
+        mask = einops.repeat(self._mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
+        if t.dim() == 0:
+            # some schedulers expect t to be one-dimensional.
+            # TODO: file diffusers bug about inconsistency?
+            t = einops.repeat(t, "-> batch", batch=batch_size)
+        # Noise shouldn't be re-randomized between steps here. The multistep schedulers
+        # get very confused about what is happening from step to step when we do that.
+        mask_latents = ctx.scheduler.add_noise(ctx.inputs.orig_latents, self._noise, t)
+        # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
+        # mask_latents = self.scheduler.scale_model_input(mask_latents, t)
+        mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
+        if self._is_gradient_mask:
+            threshold = (t.item()) / ctx.scheduler.config.num_train_timesteps
+            mask_bool = mask < 1 - threshold
+            masked_input = torch.where(mask_bool, latents, mask_latents)
+        else:
+            masked_input = torch.lerp(latents, mask_latents.to(dtype=latents.dtype), mask.to(dtype=latents.dtype))
+        return masked_input
+
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
+    def init_tensors(self, ctx: DenoiseContext):
+        if not self._is_normal_model(ctx.unet):
+            raise ValueError(
+                "InpaintExt should be used only on normal (non-inpainting) models. This could be caused by an "
+                "inpainting model that was incorrectly marked as a non-inpainting model. In some cases, this can be "
+                "fixed by removing and re-adding the model (so that it gets re-probed)."
+            )
+
+        self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+        self._noise = ctx.inputs.noise
+        # 'noise' might be None if the latents have already been noised (e.g. when running the SDXL refiner).
+        # We still need noise for inpainting, so we generate it from the seed here.
+        if self._noise is None:
+            self._noise = torch.randn(
+                ctx.latents.shape,
+                dtype=torch.float32,
+                device="cpu",
+                generator=torch.Generator(device="cpu").manual_seed(ctx.seed),
+            ).to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+    # Use negative order to make extensions with default order work with patched latents
+    @callback(ExtensionCallbackType.PRE_STEP, order=-100)
+    def apply_mask_to_initial_latents(self, ctx: DenoiseContext):
+        ctx.latents = self._apply_mask(ctx, ctx.latents, ctx.timestep)
+
+    # TODO: redo this with preview events rewrite
+    # Use negative order to make extensions with default order work with patched latents
+    @callback(ExtensionCallbackType.POST_STEP, order=-100)
+    def apply_mask_to_step_output(self, ctx: DenoiseContext):
+        timestep = ctx.scheduler.timesteps[-1]
+        if hasattr(ctx.step_output, "denoised"):
+            ctx.step_output.denoised = self._apply_mask(ctx, ctx.step_output.denoised, timestep)
+        elif hasattr(ctx.step_output, "pred_original_sample"):
+            ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.pred_original_sample, timestep)
+        else:
+            ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.prev_sample, timestep)
+
+    # Restore unmasked part after the last step is completed
+    @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
+    def restore_unmasked(self, ctx: DenoiseContext):
+        if self._is_gradient_mask:
+            ctx.latents = torch.where(self._mask < 1, ctx.latents, ctx.inputs.orig_latents)
+        else:
+            ctx.latents = torch.lerp(ctx.latents, ctx.inputs.orig_latents, self._mask)
--- a/invokeai/backend/stable_diffusion/extensions/inpaint_model.py
+++ b/invokeai/backend/stable_diffusion/extensions/inpaint_model.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class InpaintModelExt(ExtensionBase):
+    """An extension for inpainting with inpainting models. See `InpaintExt` for inpainting with non-inpainting
+    models.
+    """
+
+    def __init__(
+        self,
+        mask: Optional[torch.Tensor],
+        masked_latents: Optional[torch.Tensor],
+        is_gradient_mask: bool,
+    ):
+        """Initialize InpaintModelExt.
+        Args:
+            mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
+                inpainted.
+            masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.
+                If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)
+            is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
+                from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
+                1.
+        """
+        super().__init__()
+        if mask is not None and masked_latents is None:
+            raise ValueError("Source image required for inpaint mask when inpaint model used!")
+
+        # Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint
+        self._mask = None
+        if mask is not None:
+            self._mask = 1 - mask
+        self._masked_latents = masked_latents
+        self._is_gradient_mask = is_gradient_mask
+
+    @staticmethod
+    def _is_inpaint_model(unet: UNet2DConditionModel):
+        """Checks if the provided UNet belongs to a regular model.
+        The `in_channels` of a UNet vary depending on model type:
+        - normal - 4
+        - depth - 5
+        - inpaint - 9
+        """
+        return unet.conv_in.in_channels == 9
+
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
+    def init_tensors(self, ctx: DenoiseContext):
+        if not self._is_inpaint_model(ctx.unet):
+            raise ValueError("InpaintModelExt should be used only on inpaint models!")
+
+        if self._mask is None:
+            self._mask = torch.ones_like(ctx.latents[:1, :1])
+        self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+        if self._masked_latents is None:
+            self._masked_latents = torch.zeros_like(ctx.latents[:1])
+        self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+    # Do last so that other extensions works with normal latents
+    @callback(ExtensionCallbackType.PRE_UNET, order=1000)
+    def append_inpaint_layers(self, ctx: DenoiseContext):
+        batch_size = ctx.unet_kwargs.sample.shape[0]
+        b_mask = torch.cat([self._mask] * batch_size)
+        b_masked_latents = torch.cat([self._masked_latents] * batch_size)
+        ctx.unet_kwargs.sample = torch.cat(
+            [ctx.unet_kwargs.sample, b_mask, b_masked_latents],
+            dim=1,
+        )
+
+    # Restore unmasked part as inpaint model can change unmasked part slightly
+    @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
+    def restore_unmasked(self, ctx: DenoiseContext):
+        if self._is_gradient_mask:
+            ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
+        else:
+            ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)
--- a/invokeai/backend/stable_diffusion/extensions/preview.py
+++ b/invokeai/backend/stable_diffusion/extensions/preview.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, Optional
+
+import torch
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+# TODO: change event to accept image instead of latents
+@dataclass
+class PipelineIntermediateState:
+    step: int
+    order: int
+    total_steps: int
+    timestep: int
+    latents: torch.Tensor
+    predicted_original: Optional[torch.Tensor] = None
+
+
+class PreviewExt(ExtensionBase):
+    def __init__(self, callback: Callable[[PipelineIntermediateState], None]):
+        super().__init__()
+        self.callback = callback
+
+    # do last so that all other changes shown
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP, order=1000)
+    def initial_preview(self, ctx: DenoiseContext):
+        self.callback(
+            PipelineIntermediateState(
+                step=-1,
+                order=ctx.scheduler.order,
+                total_steps=len(ctx.inputs.timesteps),
+                timestep=int(ctx.scheduler.config.num_train_timesteps),  # TODO: is there any code which uses it?
+                latents=ctx.latents,
+            )
+        )
+
+    # do last so that all other changes shown
+    @callback(ExtensionCallbackType.POST_STEP, order=1000)
+    def step_preview(self, ctx: DenoiseContext):
+        if hasattr(ctx.step_output, "denoised"):
+            predicted_original = ctx.step_output.denoised
+        elif hasattr(ctx.step_output, "pred_original_sample"):
+            predicted_original = ctx.step_output.pred_original_sample
+        else:
+            predicted_original = ctx.step_output.prev_sample
+
+        self.callback(
+            PipelineIntermediateState(
+                step=ctx.step_index,
+                order=ctx.scheduler.order,
+                total_steps=len(ctx.inputs.timesteps),
+                timestep=int(ctx.timestep),  # TODO: is there any code which uses it?
+                latents=ctx.step_output.prev_sample,
+                predicted_original=predicted_original,  # TODO: is there any reason for additional field?
+            )
+        )
--- a/invokeai/backend/stable_diffusion/extensions/rescale_cfg.py
+++ b/invokeai/backend/stable_diffusion/extensions/rescale_cfg.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import torch
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class RescaleCFGExt(ExtensionBase):
+    def __init__(self, rescale_multiplier: float):
+        super().__init__()
+        self._rescale_multiplier = rescale_multiplier
+
+    @staticmethod
+    def _rescale_cfg(total_noise_pred: torch.Tensor, pos_noise_pred: torch.Tensor, multiplier: float = 0.7):
+        """Implementation of Algorithm 2 from https://arxiv.org/pdf/2305.08891.pdf."""
+        ro_pos = torch.std(pos_noise_pred, dim=(1, 2, 3), keepdim=True)
+        ro_cfg = torch.std(total_noise_pred, dim=(1, 2, 3), keepdim=True)
+
+        x_rescaled = total_noise_pred * (ro_pos / ro_cfg)
+        x_final = multiplier * x_rescaled + (1.0 - multiplier) * total_noise_pred
+        return x_final
+
+    @callback(ExtensionCallbackType.POST_COMBINE_NOISE_PREDS)
+    def rescale_noise_pred(self, ctx: DenoiseContext):
+        if self._rescale_multiplier > 0:
+            ctx.noise_pred = self._rescale_cfg(
+                ctx.noise_pred,
+                ctx.positive_noise_pred,
+                self._rescale_multiplier,
+            )
--- a/invokeai/backend/stable_diffusion/extensions/seamless.py
+++ b/invokeai/backend/stable_diffusion/extensions/seamless.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import Callable, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from diffusers import UNet2DConditionModel
+from diffusers.models.lora import LoRACompatibleConv
+
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
+
+
+class SeamlessExt(ExtensionBase):
+    def __init__(
+        self,
+        seamless_axes: List[str],
+    ):
+        super().__init__()
+        self._seamless_axes = seamless_axes
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        with self.static_patch_model(
+            model=unet,
+            seamless_axes=self._seamless_axes,
+        ):
+            yield
+
+    @staticmethod
+    @contextmanager
+    def static_patch_model(
+        model: torch.nn.Module,
+        seamless_axes: List[str],
+    ):
+        if not seamless_axes:
+            yield
+            return
+
+        x_mode = "circular" if "x" in seamless_axes else "constant"
+        y_mode = "circular" if "y" in seamless_axes else "constant"
+
+        # override conv_forward
+        # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
+        def _conv_forward_asymmetric(
+            self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None
+        ):
+            self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
+            self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
+            working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
+            working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
+            return torch.nn.functional.conv2d(
+                working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
+            )
+
+        original_layers: List[Tuple[nn.Conv2d, Callable]] = []
+        try:
+            for layer in model.modules():
+                if not isinstance(layer, torch.nn.Conv2d):
+                    continue
+
+                if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
+                    layer.lora_layer = lambda *x: 0
+                original_layers.append((layer, layer._conv_forward))
+                layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
+
+            yield
+
+        finally:
+            for layer, orig_conv_forward in original_layers:
+                layer._conv_forward = orig_conv_forward
--- a/invokeai/backend/stable_diffusion/extensions/t2i_adapter.py
+++ b/invokeai/backend/stable_diffusion/extensions/t2i_adapter.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, List, Optional, Union
+
+import torch
+from diffusers import T2IAdapter
+from PIL.Image import Image
+
+from invokeai.app.util.controlnet_utils import prepare_control_image
+from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.app.invocations.model import ModelIdentifierField
+    from invokeai.app.services.shared.invocation_context import InvocationContext
+    from invokeai.app.util.controlnet_utils import CONTROLNET_RESIZE_VALUES
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class T2IAdapterExt(ExtensionBase):
+    def __init__(
+        self,
+        node_context: InvocationContext,
+        model_id: ModelIdentifierField,
+        image: Image,
+        weight: Union[float, List[float]],
+        begin_step_percent: float,
+        end_step_percent: float,
+        resize_mode: CONTROLNET_RESIZE_VALUES,
+    ):
+        super().__init__()
+        self._node_context = node_context
+        self._model_id = model_id
+        self._image = image
+        self._weight = weight
+        self._resize_mode = resize_mode
+        self._begin_step_percent = begin_step_percent
+        self._end_step_percent = end_step_percent
+
+        self._adapter_state: Optional[List[torch.Tensor]] = None
+
+        # The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
+        model_config = self._node_context.models.get_config(self._model_id.key)
+        if model_config.base == BaseModelType.StableDiffusion1:
+            self._max_unet_downscale = 8
+        elif model_config.base == BaseModelType.StableDiffusionXL:
+            self._max_unet_downscale = 4
+        else:
+            raise ValueError(f"Unexpected T2I-Adapter base model type: '{model_config.base}'.")
+
+    @callback(ExtensionCallbackType.SETUP)
+    def setup(self, ctx: DenoiseContext):
+        t2i_model: T2IAdapter
+        with self._node_context.models.load(self._model_id) as t2i_model:
+            _, _, latents_height, latents_width = ctx.inputs.orig_latents.shape
+
+            self._adapter_state = self._run_model(
+                model=t2i_model,
+                image=self._image,
+                latents_height=latents_height,
+                latents_width=latents_width,
+            )
+
+    def _run_model(
+        self,
+        model: T2IAdapter,
+        image: Image,
+        latents_height: int,
+        latents_width: int,
+    ):
+        # Resize the T2I-Adapter input image.
+        # We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
+        # result will match the latent image's dimensions after max_unet_downscale is applied.
+        input_height = latents_height // self._max_unet_downscale * model.total_downscale_factor
+        input_width = latents_width // self._max_unet_downscale * model.total_downscale_factor
+
+        # Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
+        # a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
+        # T2I-Adapter model.
+        #
+        # Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
+        # of the same requirements (e.g. preserving binary masks during resize).
+        t2i_image = prepare_control_image(
+            image=image,
+            do_classifier_free_guidance=False,
+            width=input_width,
+            height=input_height,
+            num_channels=model.config["in_channels"],
+            device=model.device,
+            dtype=model.dtype,
+            resize_mode=self._resize_mode,
+        )
+
+        return model(t2i_image)
+
+    @callback(ExtensionCallbackType.PRE_UNET)
+    def pre_unet_step(self, ctx: DenoiseContext):
+        # skip if model not active in current step
+        total_steps = len(ctx.inputs.timesteps)
+        first_step = math.floor(self._begin_step_percent * total_steps)
+        last_step = math.ceil(self._end_step_percent * total_steps)
+        if ctx.step_index < first_step or ctx.step_index > last_step:
+            return
+
+        weight = self._weight
+        if isinstance(weight, list):
+            weight = weight[ctx.step_index]
+
+        adapter_state = self._adapter_state
+        if ctx.conditioning_mode == ConditioningMode.Both:
+            adapter_state = [torch.cat([v] * 2) for v in adapter_state]
+
+        if ctx.unet_kwargs.down_intrablock_additional_residuals is None:
+            ctx.unet_kwargs.down_intrablock_additional_residuals = [v * weight for v in adapter_state]
+        else:
+            for i, value in enumerate(adapter_state):
+                ctx.unet_kwargs.down_intrablock_additional_residuals[i] += value * weight
--- a/invokeai/backend/stable_diffusion/extensions_manager.py
+++ b/invokeai/backend/stable_diffusion/extensions_manager.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+from contextlib import ExitStack, contextmanager
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional
+
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.app.services.session_processor.session_processor_common import CanceledException
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+    from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+    from invokeai.backend.stable_diffusion.extensions.base import CallbackFunctionWithMetadata, ExtensionBase
+
+
+class ExtensionsManager:
+    def __init__(self, is_canceled: Optional[Callable[[], bool]] = None):
+        self._is_canceled = is_canceled
+
+        # A list of extensions in the order that they were added to the ExtensionsManager.
+        self._extensions: List[ExtensionBase] = []
+        self._ordered_callbacks: Dict[ExtensionCallbackType, List[CallbackFunctionWithMetadata]] = {}
+
+    def add_extension(self, extension: ExtensionBase):
+        self._extensions.append(extension)
+        self._regenerate_ordered_callbacks()
+
+    def _regenerate_ordered_callbacks(self):
+        """Regenerates self._ordered_callbacks. Intended to be called each time a new extension is added."""
+        self._ordered_callbacks = {}
+
+        # Fill the ordered callbacks dictionary.
+        for extension in self._extensions:
+            for callback_type, callbacks in extension.get_callbacks().items():
+                if callback_type not in self._ordered_callbacks:
+                    self._ordered_callbacks[callback_type] = []
+                self._ordered_callbacks[callback_type].extend(callbacks)
+
+        # Sort each callback list.
+        for callback_type, callbacks in self._ordered_callbacks.items():
+            # Note that sorted() is stable, so if two callbacks have the same order, the order that they extensions were
+            # added will be preserved.
+            self._ordered_callbacks[callback_type] = sorted(callbacks, key=lambda x: x.metadata.order)
+
+    def run_callback(self, callback_type: ExtensionCallbackType, ctx: DenoiseContext):
+        if self._is_canceled and self._is_canceled():
+            raise CanceledException
+
+        callbacks = self._ordered_callbacks.get(callback_type, [])
+        for cb in callbacks:
+            cb.function(ctx)
+
+    @contextmanager
+    def patch_extensions(self, ctx: DenoiseContext):
+        if self._is_canceled and self._is_canceled():
+            raise CanceledException
+
+        with ExitStack() as exit_stack:
+            for ext in self._extensions:
+                exit_stack.enter_context(ext.patch_extension(ctx))
+
+            yield None
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        if self._is_canceled and self._is_canceled():
+            raise CanceledException
+
+        # TODO: create weight patch logic in PR with extension which uses it
+        with ExitStack() as exit_stack:
+            for ext in self._extensions:
+                exit_stack.enter_context(ext.patch_unet(unet, cached_weights))
+
+            yield None
--- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
+++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
@@ -61,6 +61,7 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
            # full noise. Investigate the history of why this got commented out.
            # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
            latents = self.scheduler.add_noise(latents, noise, batched_init_timestep)
+            assert isinstance(latents, torch.Tensor)  # For static type checking.

        # TODO(ryand): Look into the implications of passing in latents here that are larger than they will be after
        # cropping into regions.
@@ -122,19 +123,42 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
                    control_data=region_conditioning.control_data,
                )

-                # Store the results from the region.
-                # If two tiles overlap by more than the target overlap amount, crop the left and top edges of the
-                # affected tiles to achieve the target overlap.
+                # Build a region_weight matrix that applies gradient blending to the edges of the region.
                region = region_conditioning.region
-                top_adjustment = max(0, region.overlap.top - target_overlap)
-                left_adjustment = max(0, region.overlap.left - target_overlap)
-                region_height_slice = slice(region.coords.top + top_adjustment, region.coords.bottom)
-                region_width_slice = slice(region.coords.left + left_adjustment, region.coords.right)
-                merged_latents[:, :, region_height_slice, region_width_slice] += step_output.prev_sample[
-                    :, :, top_adjustment:, left_adjustment:
-                ]
-                # For now, we treat every region as having the same weight.
-                merged_latents_weights[:, :, region_height_slice, region_width_slice] += 1.0
+                _, _, region_height, region_width = step_output.prev_sample.shape
+                region_weight = torch.ones(
+                    (1, 1, region_height, region_width),
+                    dtype=latents.dtype,
+                    device=latents.device,
+                )
+                if region.overlap.left > 0:
+                    left_grad = torch.linspace(
+                        0, 1, region.overlap.left, device=latents.device, dtype=latents.dtype
+                    ).view((1, 1, 1, -1))
+                    region_weight[:, :, :, : region.overlap.left] *= left_grad
+                if region.overlap.top > 0:
+                    top_grad = torch.linspace(
+                        0, 1, region.overlap.top, device=latents.device, dtype=latents.dtype
+                    ).view((1, 1, -1, 1))
+                    region_weight[:, :, : region.overlap.top, :] *= top_grad
+                if region.overlap.right > 0:
+                    right_grad = torch.linspace(
+                        1, 0, region.overlap.right, device=latents.device, dtype=latents.dtype
+                    ).view((1, 1, 1, -1))
+                    region_weight[:, :, :, -region.overlap.right :] *= right_grad
+                if region.overlap.bottom > 0:
+                    bottom_grad = torch.linspace(
+                        1, 0, region.overlap.bottom, device=latents.device, dtype=latents.dtype
+                    ).view((1, 1, -1, 1))
+                    region_weight[:, :, -region.overlap.bottom :, :] *= bottom_grad
+
+                # Update the merged results with the region results.
+                merged_latents[
+                    :, :, region.coords.top : region.coords.bottom, region.coords.left : region.coords.right
+                ] += step_output.prev_sample * region_weight
+                merged_latents_weights[
+                    :, :, region.coords.top : region.coords.bottom, region.coords.left : region.coords.right
+                ] += region_weight

                pred_orig_sample = getattr(step_output, "pred_original_sample", None)
                if pred_orig_sample is not None:
@@ -142,9 +166,9 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
                    # they all use the same scheduler.
                    if merged_pred_original is None:
                        merged_pred_original = torch.zeros_like(latents)
-                    merged_pred_original[:, :, region_height_slice, region_width_slice] += pred_orig_sample[
-                        :, :, top_adjustment:, left_adjustment:
-                    ]
+                    merged_pred_original[
+                        :, :, region.coords.top : region.coords.bottom, region.coords.left : region.coords.right
+                    ] += pred_orig_sample

            # Normalize the merged results.
            latents = torch.where(merged_latents_weights > 0, merged_latents / merged_latents_weights, merged_latents)
--- a/invokeai/backend/stable_diffusion/seamless.py
+++ b/invokeai/backend/stable_diffusion/seamless.py
@@ -1,51 +0,0 @@
-from contextlib import contextmanager
-from typing import Callable, List, Optional, Tuple, Union
-
-import torch
-import torch.nn as nn
-from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
-from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
-from diffusers.models.lora import LoRACompatibleConv
-from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
-
-
-@contextmanager
-def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
-    if not seamless_axes:
-        yield
-        return
-
-    # override conv_forward
-    # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
-    def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
-        self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
-        self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
-        working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
-        working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
-        return torch.nn.functional.conv2d(
-            working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
-        )
-
-    original_layers: List[Tuple[nn.Conv2d, Callable]] = []
-
-    try:
-        x_mode = "circular" if "x" in seamless_axes else "constant"
-        y_mode = "circular" if "y" in seamless_axes else "constant"
-
-        conv_layers: List[torch.nn.Conv2d] = []
-
-        for module in model.modules():
-            if isinstance(module, torch.nn.Conv2d):
-                conv_layers.append(module)
-
-        for layer in conv_layers:
-            if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
-                layer.lora_layer = lambda *x: 0
-            original_layers.append((layer, layer._conv_forward))
-            layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
-
-        yield
-
-    finally:
-        for layer, orig_conv_forward in original_layers:
-            layer._conv_forward = orig_conv_forward
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -65,17 +65,12 @@ class TextualInversionModelRaw(RawModel):

        return result

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        if not torch.cuda.is_available():
            return
        for emb in [self.embedding, self.embedding_2]:
            if emb is not None:
-                emb.to(device=device, dtype=dtype, non_blocking=non_blocking)
+                emb.to(device=device, dtype=dtype)

    def calc_size(self) -> int:
        """Get the size of this model in bytes."""
--- a/invokeai/backend/util/devices.py
+++ b/invokeai/backend/util/devices.py
@@ -112,15 +112,3 @@ class TorchDevice:
    @classmethod
    def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
        return NAME_TO_PRECISION[precision_name]
-
-    @staticmethod
-    def get_non_blocking(to_device: torch.device) -> bool:
-        """Return the non_blocking flag to be used when moving a tensor to a given device.
-        MPS may have unexpected errors with non-blocking operations - we should not use non-blocking when moving _to_ MPS.
-        When moving _from_ MPS, we can use non-blocking operations.
-
-        See:
-        - https://github.com/pytorch/pytorch/issues/107455
-        - https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/28
-        """
-        return False if to_device.type == "mps" else True
--- a/invokeai/frontend/web/package.json
+++ b/invokeai/frontend/web/package.json
@@ -155,5 +155,8 @@
    "vite-plugin-eslint": "^1.8.1",
    "vite-tsconfig-paths": "^4.3.2",
    "vitest": "^1.6.0"
+  },
+  "engines": {
+    "pnpm": "8"
  }
 }
--- a/invokeai/frontend/web/public/locales/ar.json
+++ b/invokeai/frontend/web/public/locales/ar.json
@@ -77,10 +77,6 @@
            "title": "استعادة الوجوه",
            "desc": "استعادة الصورة الحالية"
        },
-        "upscale": {
-            "title": "تحسين الحجم",
-            "desc": "تحسين حجم الصورة الحالية"
-        },
        "showInfo": {
            "title": "عرض المعلومات",
            "desc": "عرض معلومات البيانات الخاصة بالصورة الحالية"
@@ -255,8 +251,6 @@
        "type": "نوع",
        "strength": "قوة",
        "upscaling": "تصغير",
-        "upscale": "تصغير",
-        "upscaleImage": "تصغير الصورة",
        "scale": "مقياس",
        "imageFit": "ملائمة الصورة الأولية لحجم الخرج",
        "scaleBeforeProcessing": "تحجيم قبل المعالجة",
--- a/invokeai/frontend/web/public/locales/de.json
+++ b/invokeai/frontend/web/public/locales/de.json
@@ -187,10 +187,6 @@
            "title": "Gesicht restaurieren",
            "desc": "Das aktuelle Bild restaurieren"
        },
-        "upscale": {
-            "title": "Hochskalieren",
-            "desc": "Das aktuelle Bild hochskalieren"
-        },
        "showInfo": {
            "title": "Info anzeigen",
            "desc": "Metadaten des aktuellen Bildes anzeigen"
@@ -433,8 +429,6 @@
        "type": "Art",
        "strength": "Stärke",
        "upscaling": "Hochskalierung",
-        "upscale": "Hochskalieren (Shift + U)",
-        "upscaleImage": "Bild hochskalieren",
        "scale": "Maßstab",
        "imageFit": "Ausgangsbild an Ausgabegröße anpassen",
        "scaleBeforeProcessing": "Skalieren vor der Verarbeitung",
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -32,12 +32,14 @@
        "deleteBoardAndImages": "Delete Board and Images",
        "deleteBoardOnly": "Delete Board Only",
        "deletedBoardsCannotbeRestored": "Deleted boards cannot be restored",
+        "hideBoards": "Hide Boards",
        "loading": "Loading...",
        "menuItemAutoAdd": "Auto-add to this Board",
        "move": "Move",
        "movingImagesToBoard_one": "Moving {{count}} image to board:",
        "movingImagesToBoard_other": "Moving {{count}} images to board:",
        "myBoard": "My Board",
+        "noBoards": "No {{boardType}} Boards",
        "noMatching": "No matching Boards",
        "private": "Private Boards",
        "searchBoard": "Search Boards...",
@@ -46,6 +48,7 @@
        "topMessage": "This board contains images used in the following features:",
        "unarchiveBoard": "Unarchive Board",
        "uncategorized": "Uncategorized",
+        "viewBoards": "View Boards",
        "downloadBoard": "Download Board",
        "imagesWithCount_one": "{{count}} image",
        "imagesWithCount_other": "{{count}} images",
@@ -102,6 +105,7 @@
        "negativePrompt": "Negative Prompt",
        "discordLabel": "Discord",
        "dontAskMeAgain": "Don't ask me again",
+        "dontShowMeThese": "Don't show me these",
        "editor": "Editor",
        "error": "Error",
        "file": "File",
@@ -373,10 +377,14 @@
        "displayBoardSearch": "Display Board Search",
        "displaySearch": "Display Search",
        "download": "Download",
+        "exitBoardSearch": "Exit Board Search",
+        "exitSearch": "Exit Search",
        "featuresWillReset": "If you delete this image, those features will immediately be reset.",
        "galleryImageSize": "Image Size",
        "gallerySettings": "Gallery Settings",
+        "go": "Go",
        "image": "image",
+        "jump": "Jump",
        "loading": "Loading",
        "loadMore": "Load More",
        "newestFirst": "Newest First",
@@ -636,9 +644,9 @@
            "title": "Undo Stroke"
        },
        "unifiedCanvasHotkeys": "Unified Canvas",
-        "upscale": {
-            "desc": "Upscale the current image",
-            "title": "Upscale"
+        "postProcess": {
+            "desc": "Process the current image using the selected post-processing model",
+            "title": "Process Image"
        },
        "toggleViewer": {
            "desc": "Switches between the Image Viewer and workspace for the current tab.",
@@ -1027,6 +1035,7 @@
        "imageActions": "Image Actions",
        "sendToImg2Img": "Send to Image to Image",
        "sendToUnifiedCanvas": "Send To Unified Canvas",
+        "sendToUpscale": "Send To Upscale",
        "showOptionsPanel": "Show Side Panel (O or T)",
        "shuffle": "Shuffle Seed",
        "steps": "Steps",
@@ -1034,8 +1043,8 @@
        "symmetry": "Symmetry",
        "tileSize": "Tile Size",
        "type": "Type",
-        "upscale": "Upscale (Shift + U)",
-        "upscaleImage": "Upscale Image",
+        "postProcessing": "Post-Processing (Shift + U)",
+        "processImage": "Process Image",
        "upscaling": "Upscaling",
        "useAll": "Use All",
        "useSize": "Use Size",
@@ -1091,6 +1100,8 @@
        "displayInProgress": "Display Progress Images",
        "enableImageDebugging": "Enable Image Debugging",
        "enableInformationalPopovers": "Enable Informational Popovers",
+        "informationalPopoversDisabled": "Informational Popovers Disabled",
+        "informationalPopoversDisabledDesc": "Informational popovers have been disabled. Enable them in Settings.",
        "enableInvisibleWatermark": "Enable Invisible Watermark",
        "enableNSFWChecker": "Enable NSFW Checker",
        "general": "General",
@@ -1498,6 +1509,30 @@
        "seamlessTilingYAxis": {
            "heading": "Seamless Tiling Y Axis",
            "paragraphs": ["Seamlessly tile an image along the vertical axis."]
+        },
+        "upscaleModel": {
+            "heading": "Upscale Model",
+            "paragraphs": [
+                "The upscale model scales the image to the output size before details are added. Any supported upscale model may be used, but some are specialized for different kinds of images, like photos or line drawings."
+            ]
+        },
+        "scale": {
+            "heading": "Scale",
+            "paragraphs": [
+                "Scale controls the output image size, and is based on a multiple of the input image resolution. For example a 2x upscale on a 1024x1024 image would produce a 2048 x 2048 output."
+            ]
+        },
+        "creativity": {
+            "heading": "Creativity",
+            "paragraphs": [
+                "Creativity controls the amount of freedom granted to the model when adding details. Low creativity stays close to the original image, while high creativity allows for more change. When using a prompt, high creativity increases the influence of the prompt."
+            ]
+        },
+        "structure": {
+            "heading": "Structure",
+            "paragraphs": [
+                "Structure controls how closely the output image will keep to the layout of the original. Low structure allows major changes, while high structure strictly maintains the original composition and layout."
+            ]
        }
    },
    "unifiedCanvas": {
@@ -1640,6 +1675,27 @@
        "layers_one": "Layer",
        "layers_other": "Layers"
    },
+    "upscaling": {
+        "creativity": "Creativity",
+        "structure": "Structure",
+        "upscaleModel": "Upscale Model",
+        "postProcessingModel": "Post-Processing Model",
+        "scale": "Scale",
+        "postProcessingMissingModelWarning": "Visit the <LinkComponent>Model Manager</LinkComponent> to install a post-processing (image to image) model.",
+        "missingModelsWarning": "Visit the <LinkComponent>Model Manager</LinkComponent> to install the required models:",
+        "mainModelDesc": "Main model (SD1.5 or SDXL architecture)",
+        "tileControlNetModelDesc": "Tile ControlNet model for the chosen main model architecture",
+        "upscaleModelDesc": "Upscale (image to image) model",
+        "missingUpscaleInitialImage": "Missing initial image for upscaling",
+        "missingUpscaleModel": "Missing upscale model",
+        "missingTileControlNetModel": "No valid tile ControlNet models installed"
+    },
+    "upsell": {
+        "inviteTeammates": "Invite Teammates",
+        "professional": "Professional",
+        "professionalUpsell": "Available in Invoke’s Professional Edition. Click here or visit invoke.com/pricing for more details.",
+        "shareAccess": "Share Access"
+    },
    "ui": {
        "tabs": {
            "generation": "Generation",
@@ -1651,7 +1707,9 @@
            "models": "Models",
            "modelsTab": "$t(ui.tabs.models) $t(common.tab)",
            "queue": "Queue",
-            "queueTab": "$t(ui.tabs.queue) $t(common.tab)"
+            "queueTab": "$t(ui.tabs.queue) $t(common.tab)",
+            "upscaling": "Upscaling",
+            "upscalingTab": "$t(ui.tabs.upscaling) $t(common.tab)"
        }
    }
 }
--- a/invokeai/frontend/web/public/locales/es.json
+++ b/invokeai/frontend/web/public/locales/es.json
@@ -151,10 +151,6 @@
            "title": "Restaurar rostros",
            "desc": "Restaurar rostros en la imagen actual"
        },
-        "upscale": {
-            "title": "Aumentar resolución",
-            "desc": "Aumentar la resolución de la imagen actual"
-        },
        "showInfo": {
            "title": "Mostrar información",
            "desc": "Mostar metadatos de la imagen actual"
@@ -360,8 +356,6 @@
        "type": "Tipo",
        "strength": "Fuerza",
        "upscaling": "Aumento de resolución",
-        "upscale": "Aumentar resolución",
-        "upscaleImage": "Aumentar la resolución de la imagen",
        "scale": "Escala",
        "imageFit": "Ajuste tamaño de imagen inicial al tamaño objetivo",
        "scaleBeforeProcessing": "Redimensionar antes de procesar",
@@ -408,7 +402,12 @@
        "showProgressInViewer": "Mostrar las imágenes del progreso en el visor",
        "ui": "Interfaz del usuario",
        "generation": "Generación",
-        "beta": "Beta"
+        "beta": "Beta",
+        "reloadingIn": "Recargando en",
+        "intermediatesClearedFailed": "Error limpiando los intermediarios",
+        "intermediatesCleared_one": "Borrado {{count}} intermediario",
+        "intermediatesCleared_many": "Borrados {{count}} intermediarios",
+        "intermediatesCleared_other": "Borrados {{count}} intermediarios"
    },
    "toast": {
        "uploadFailed": "Error al subir archivo",
@@ -426,7 +425,12 @@
        "parameterSet": "Conjunto de parámetros",
        "parameterNotSet": "Parámetro no configurado",
        "problemCopyingImage": "No se puede copiar la imagen",
-        "errorCopied": "Error al copiar"
+        "errorCopied": "Error al copiar",
+        "baseModelChanged": "Modelo base cambiado",
+        "addedToBoard": "Añadido al tablero",
+        "baseModelChangedCleared_one": "Borrado o desactivado {{count}} submodelo incompatible",
+        "baseModelChangedCleared_many": "Borrados o desactivados {{count}} submodelos incompatibles",
+        "baseModelChangedCleared_other": "Borrados o desactivados {{count}} submodelos incompatibles"
    },
    "tooltip": {
        "feature": {
@@ -540,7 +544,13 @@
        "downloadBoard": "Descargar panel",
        "deleteBoardOnly": "Borrar solo el panel",
        "myBoard": "Mi panel",
-        "noMatching": "No hay paneles que coincidan"
+        "noMatching": "No hay paneles que coincidan",
+        "imagesWithCount_one": "{{count}} imagen",
+        "imagesWithCount_many": "{{count}} imágenes",
+        "imagesWithCount_other": "{{count}} imágenes",
+        "assetsWithCount_one": "{{count}} activo",
+        "assetsWithCount_many": "{{count}} activos",
+        "assetsWithCount_other": "{{count}} activos"
    },
    "accordions": {
        "compositing": {
@@ -590,6 +600,27 @@
        "balanced": "Equilibrado",
        "beginEndStepPercent": "Inicio / Final Porcentaje de pasos",
        "detectResolution": "Detectar resolución",
-        "beginEndStepPercentShort": "Inicio / Final %"
+        "beginEndStepPercentShort": "Inicio / Final %",
+        "t2i_adapter": "$t(controlnet.controlAdapter_one) #{{number}} ($t(common.t2iAdapter))",
+        "controlnet": "$t(controlnet.controlAdapter_one) #{{number}} ($t(common.controlNet))",
+        "ip_adapter": "$t(controlnet.controlAdapter_one) #{{number}} ($t(common.ipAdapter))",
+        "addControlNet": "Añadir $t(common.controlNet)",
+        "addIPAdapter": "Añadir $t(common.ipAdapter)",
+        "controlAdapter_one": "Adaptador de control",
+        "controlAdapter_many": "Adaptadores de control",
+        "controlAdapter_other": "Adaptadores de control",
+        "addT2IAdapter": "Añadir $t(common.t2iAdapter)"
+    },
+    "queue": {
+        "back": "Atrás",
+        "front": "Delante",
+        "batchQueuedDesc_one": "Se agregó {{count}} sesión a {{direction}} la cola",
+        "batchQueuedDesc_many": "Se agregaron {{count}} sesiones a {{direction}} la cola",
+        "batchQueuedDesc_other": "Se agregaron {{count}} sesiones a {{direction}} la cola"
+    },
+    "upsell": {
+        "inviteTeammates": "Invitar compañeros de equipo",
+        "shareAccess": "Compartir acceso",
+        "professionalUpsell": "Disponible en la edición profesional de Invoke. Haz clic aquí o visita invoke.com/pricing para obtener más detalles."
    }
 }
--- a/invokeai/frontend/web/public/locales/fr.json
+++ b/invokeai/frontend/web/public/locales/fr.json
@@ -130,10 +130,6 @@
            "title": "Restaurer les visages",
            "desc": "Restaurer l'image actuelle"
        },
-        "upscale": {
-            "title": "Agrandir",
-            "desc": "Agrandir l'image actuelle"
-        },
        "showInfo": {
            "title": "Afficher les informations",
            "desc": "Afficher les informations de métadonnées de l'image actuelle"
@@ -308,8 +304,6 @@
        "type": "Type",
        "strength": "Force",
        "upscaling": "Agrandissement",
-        "upscale": "Agrandir",
-        "upscaleImage": "Image en Agrandissement",
        "scale": "Echelle",
        "imageFit": "Ajuster Image Initiale à la Taille de Sortie",
        "scaleBeforeProcessing": "Echelle Avant Traitement",
--- a/invokeai/frontend/web/public/locales/he.json
+++ b/invokeai/frontend/web/public/locales/he.json
@@ -90,10 +90,6 @@
            "desc": "שחזור התמונה הנוכחית",
            "title": "שחזור פרצופים"
        },
-        "upscale": {
-            "title": "הגדלת קנה מידה",
-            "desc": "הגדל את התמונה הנוכחית"
-        },
        "showInfo": {
            "title": "הצג מידע",
            "desc": "הצגת פרטי מטא-נתונים של התמונה הנוכחית"
@@ -263,8 +259,6 @@
        "seed": "זרע",
        "type": "סוג",
        "strength": "חוזק",
-        "upscale": "הגדלת קנה מידה",
-        "upscaleImage": "הגדלת קנה מידת התמונה",
        "denoisingStrength": "חוזק מנטרל הרעש",
        "scaleBeforeProcessing": "שנה קנה מידה לפני עיבוד",
        "scaledWidth": "קנה מידה לאחר שינוי W",
--- a/invokeai/frontend/web/public/locales/it.json
+++ b/invokeai/frontend/web/public/locales/it.json
@@ -150,7 +150,11 @@
        "showArchivedBoards": "Mostra le bacheche archiviate",
        "searchImages": "Ricerca per metadati",
        "displayBoardSearch": "Mostra la ricerca nelle Bacheche",
-        "displaySearch": "Mostra la ricerca"
+        "displaySearch": "Mostra la ricerca",
+        "selectAllOnPage": "Seleziona tutto nella pagina",
+        "selectAllOnBoard": "Seleziona tutto nella bacheca",
+        "exitBoardSearch": "Esci da Ricerca bacheca",
+        "exitSearch": "Esci dalla ricerca"
    },
    "hotkeys": {
        "keyboardShortcuts": "Tasti di scelta rapida",
@@ -210,10 +214,6 @@
            "title": "Restaura volti",
            "desc": "Restaura l'immagine corrente"
        },
-        "upscale": {
-            "title": "Amplia",
-            "desc": "Amplia l'immagine corrente"
-        },
        "showInfo": {
            "title": "Mostra informazioni",
            "desc": "Mostra le informazioni sui metadati dell'immagine corrente"
@@ -377,6 +377,10 @@
        "toggleViewer": {
            "title": "Attiva/disattiva il visualizzatore di immagini",
            "desc": "Passa dal visualizzatore immagini all'area di lavoro per la scheda corrente."
+        },
+        "postProcess": {
+            "desc": "Elabora l'immagine corrente utilizzando il modello di post-elaborazione selezionato",
+            "title": "Elabora immagine"
        }
    },
    "modelManager": {
@@ -505,8 +509,6 @@
        "type": "Tipo",
        "strength": "Forza",
        "upscaling": "Ampliamento",
-        "upscale": "Amplia (Shift + U)",
-        "upscaleImage": "Amplia Immagine",
        "scale": "Scala",
        "imageFit": "Adatta l'immagine iniziale alle dimensioni di output",
        "scaleBeforeProcessing": "Scala prima dell'elaborazione",
@@ -591,7 +593,10 @@
        "infillColorValue": "Colore di riempimento",
        "globalSettings": "Impostazioni globali",
        "globalPositivePromptPlaceholder": "Prompt positivo globale",
-        "globalNegativePromptPlaceholder": "Prompt negativo globale"
+        "globalNegativePromptPlaceholder": "Prompt negativo globale",
+        "processImage": "Elabora Immagine",
+        "sendToUpscale": "Invia a Ampliare",
+        "postProcessing": "Post-elaborazione (Shift + U)"
    },
    "settings": {
        "models": "Modelli",
@@ -964,7 +969,10 @@
        "boards": "Bacheche",
        "private": "Bacheche private",
        "shared": "Bacheche condivise",
-        "addPrivateBoard": "Aggiungi una Bacheca Privata"
+        "addPrivateBoard": "Aggiungi una Bacheca Privata",
+        "noBoards": "Nessuna bacheca {{boardType}}",
+        "hideBoards": "Nascondi bacheche",
+        "viewBoards": "Visualizza bacheche"
    },
    "controlnet": {
        "contentShuffleDescription": "Rimescola il contenuto di un'immagine",
@@ -1684,7 +1692,30 @@
            "models": "Modelli",
            "modelsTab": "$t(ui.tabs.models) $t(common.tab)",
            "queue": "Coda",
-            "queueTab": "$t(ui.tabs.queue) $t(common.tab)"
+            "queueTab": "$t(ui.tabs.queue) $t(common.tab)",
+            "upscaling": "Ampliamento",
+            "upscalingTab": "$t(ui.tabs.upscaling) $t(common.tab)"
        }
+    },
+    "upscaling": {
+        "creativity": "Creatività",
+        "structure": "Struttura",
+        "upscaleModel": "Modello di Ampliamento",
+        "scale": "Scala",
+        "missingModelsWarning": "Visita <LinkComponent>Gestione modelli</LinkComponent> per installare i modelli richiesti:",
+        "mainModelDesc": "Modello principale (architettura SD1.5 o SDXL)",
+        "tileControlNetModelDesc": "Modello Tile ControlNet per l'architettura del modello principale scelto",
+        "upscaleModelDesc": "Modello per l'ampliamento (da immagine a immagine)",
+        "missingUpscaleInitialImage": "Immagine iniziale mancante per l'ampliamento",
+        "missingUpscaleModel": "Modello per l’ampliamento mancante",
+        "missingTileControlNetModel": "Nessun modello ControlNet Tile valido installato",
+        "postProcessingModel": "Modello di post-elaborazione",
+        "postProcessingMissingModelWarning": "Visita <LinkComponent>Gestione modelli</LinkComponent> per installare un modello di post-elaborazione (da immagine a immagine)."
+    },
+    "upsell": {
+        "inviteTeammates": "Invita collaboratori",
+        "shareAccess": "Condividi l'accesso",
+        "professional": "Professionale",
+        "professionalUpsell": "Disponibile nell'edizione Professional di Invoke. Fai clic qui o visita invoke.com/pricing per ulteriori dettagli."
    }
 }
--- a/invokeai/frontend/web/public/locales/ja.json
+++ b/invokeai/frontend/web/public/locales/ja.json
@@ -199,10 +199,6 @@
            "title": "顔の修復",
            "desc": "現在の画像を修復"
        },
-        "upscale": {
-            "title": "アップスケール",
-            "desc": "現在の画像をアップスケール"
-        },
        "showInfo": {
            "title": "情報を見る",
            "desc": "現在の画像のメタデータ情報を表示"
@@ -427,8 +423,6 @@
        "shuffle": "シャッフル",
        "strength": "強度",
        "upscaling": "アップスケーリング",
-        "upscale": "アップスケール",
-        "upscaleImage": "画像をアップスケール",
        "scale": "Scale",
        "scaleBeforeProcessing": "処理前のスケール",
        "scaledWidth": "幅のスケール",
--- a/invokeai/frontend/web/public/locales/ko.json
+++ b/invokeai/frontend/web/public/locales/ko.json
@@ -258,10 +258,6 @@
            "desc": "캔버스 브러시를 선택",
            "title": "브러시 선택"
        },
-        "upscale": {
-            "desc": "현재 이미지를 업스케일",
-            "title": "업스케일"
-        },
        "previousImage": {
            "title": "이전 이미지",
            "desc": "갤러리에 이전 이미지 표시"
--- a/invokeai/frontend/web/public/locales/nl.json
+++ b/invokeai/frontend/web/public/locales/nl.json
@@ -168,10 +168,6 @@
            "title": "Herstel gezichten",
            "desc": "Herstelt de huidige afbeelding"
        },
-        "upscale": {
-            "title": "Schaal op",
-            "desc": "Schaalt de huidige afbeelding op"
-        },
        "showInfo": {
            "title": "Toon info",
            "desc": "Toont de metagegevens van de huidige afbeelding"
@@ -412,8 +408,6 @@
        "type": "Soort",
        "strength": "Sterkte",
        "upscaling": "Opschalen",
-        "upscale": "Vergroot (Shift + U)",
-        "upscaleImage": "Schaal afbeelding op",
        "scale": "Schaal",
        "imageFit": "Pas initiële afbeelding in uitvoergrootte",
        "scaleBeforeProcessing": "Schalen voor verwerking",
--- a/invokeai/frontend/web/public/locales/pl.json
+++ b/invokeai/frontend/web/public/locales/pl.json
@@ -78,10 +78,6 @@
            "title": "Popraw twarze",
            "desc": "Uruchamia proces poprawiania twarzy dla aktywnego obrazu"
        },
-        "upscale": {
-            "title": "Powiększ",
-            "desc": "Uruchamia proces powiększania aktywnego obrazu"
-        },
        "showInfo": {
            "title": "Pokaż informacje",
            "desc": "Pokazuje metadane zapisane w aktywnym obrazie"
@@ -232,8 +228,6 @@
        "type": "Metoda",
        "strength": "Siła",
        "upscaling": "Powiększanie",
-        "upscale": "Powiększ",
-        "upscaleImage": "Powiększ obraz",
        "scale": "Skala",
        "imageFit": "Przeskaluj oryginalny obraz",
        "scaleBeforeProcessing": "Tryb skalowania",
--- a/invokeai/frontend/web/public/locales/pt.json
+++ b/invokeai/frontend/web/public/locales/pt.json
@@ -160,10 +160,6 @@
            "title": "Restaurar Rostos",
            "desc": "Restaurar a imagem atual"
        },
-        "upscale": {
-            "title": "Redimensionar",
-            "desc": "Redimensionar a imagem atual"
-        },
        "showInfo": {
            "title": "Mostrar Informações",
            "desc": "Mostrar metadados de informações da imagem atual"
@@ -275,8 +271,6 @@
        "showOptionsPanel": "Mostrar Painel de Opções",
        "strength": "Força",
        "upscaling": "Redimensionando",
-        "upscale": "Redimensionar",
-        "upscaleImage": "Redimensionar Imagem",
        "scaleBeforeProcessing": "Escala Antes do Processamento",
        "images": "Imagems",
        "steps": "Passos",
--- a/invokeai/frontend/web/public/locales/pt_BR.json
+++ b/invokeai/frontend/web/public/locales/pt_BR.json
@@ -80,10 +80,6 @@
            "title": "Restaurar Rostos",
            "desc": "Restaurar a imagem atual"
        },
-        "upscale": {
-            "title": "Redimensionar",
-            "desc": "Redimensionar a imagem atual"
-        },
        "showInfo": {
            "title": "Mostrar Informações",
            "desc": "Mostrar metadados de informações da imagem atual"
@@ -268,8 +264,6 @@
        "type": "Tipo",
        "strength": "Força",
        "upscaling": "Redimensionando",
-        "upscale": "Redimensionar",
-        "upscaleImage": "Redimensionar Imagem",
        "scale": "Escala",
        "imageFit": "Caber Imagem Inicial No Tamanho de Saída",
        "scaleBeforeProcessing": "Escala Antes do Processamento",
--- a/invokeai/frontend/web/public/locales/ru.json
+++ b/invokeai/frontend/web/public/locales/ru.json
@@ -214,10 +214,6 @@
            "title": "Восстановить лица",
            "desc": "Восстановить лица на текущем изображении"
        },
-        "upscale": {
-            "title": "Увеличение",
-            "desc": "Увеличить текущеее изображение"
-        },
        "showInfo": {
            "title": "Показать метаданные",
            "desc": "Показать метаданные из текущего изображения"
@@ -512,8 +508,6 @@
        "type": "Тип",
        "strength": "Сила",
        "upscaling": "Увеличение",
-        "upscale": "Увеличить",
-        "upscaleImage": "Увеличить изображение",
        "scale": "Масштаб",
        "imageFit": "Уместить изображение",
        "scaleBeforeProcessing": "Масштабировать",
--- a/invokeai/frontend/web/public/locales/sv.json
+++ b/invokeai/frontend/web/public/locales/sv.json
@@ -90,10 +90,6 @@
            "title": "Återskapa ansikten",
            "desc": "Återskapa nuvarande bild"
        },
-        "upscale": {
-            "title": "Skala upp",
-            "desc": "Skala upp nuvarande bild"
-        },
        "showInfo": {
            "title": "Visa info",
            "desc": "Visa metadata för nuvarande bild"
--- a/invokeai/frontend/web/public/locales/tr.json
+++ b/invokeai/frontend/web/public/locales/tr.json
@@ -416,10 +416,6 @@
            "desc": "Maske/Taban katmanları arasında geçiş yapar",
            "title": "Katmanı Gizle-Göster"
        },
-        "upscale": {
-            "title": "Büyüt",
-            "desc": "Seçili görseli büyüt"
-        },
        "setSeed": {
            "title": "Tohumu Kullan",
            "desc": "Seçili görselin tohumunu kullan"
@@ -641,7 +637,6 @@
        "copyImage": "Görseli Kopyala",
        "height": "Boy",
        "width": "En",
-        "upscale": "Büyüt (Shift + U)",
        "useSize": "Boyutu Kullan",
        "symmetry": "Bakışım",
        "tileSize": "Döşeme Boyutu",
@@ -657,7 +652,6 @@
        "showOptionsPanel": "Yan Paneli Göster (O ya da T)",
        "shuffle": "Kar",
        "usePrompt": "İstemi Kullan",
-        "upscaleImage": "Görseli Büyüt",
        "setToOptimalSizeTooSmall": "$t(parameters.setToOptimalSize) (çok küçük olabilir)",
        "setToOptimalSizeTooLarge": "$t(parameters.setToOptimalSize) (çok büyük olabilir)",
        "cfgRescaleMultiplier": "CFG Rescale Çarpanı",
--- a/invokeai/frontend/web/public/locales/uk.json
+++ b/invokeai/frontend/web/public/locales/uk.json
@@ -85,10 +85,6 @@
            "title": "Відновити обличчя",
            "desc": "Відновити обличчя на поточному зображенні"
        },
-        "upscale": {
-            "title": "Збільшення",
-            "desc": "Збільшити поточне зображення"
-        },
        "showInfo": {
            "title": "Показати метадані",
            "desc": "Показати метадані з поточного зображення"
@@ -276,8 +272,6 @@
        "type": "Тип",
        "strength": "Сила",
        "upscaling": "Збільшення",
-        "upscale": "Збільшити",
-        "upscaleImage": "Збільшити зображення",
        "scale": "Масштаб",
        "imageFit": "Вмістити зображення",
        "scaleBeforeProcessing": "Масштабувати",
--- a/invokeai/frontend/web/public/locales/zh_CN.json
+++ b/invokeai/frontend/web/public/locales/zh_CN.json
@@ -193,10 +193,6 @@
            "title": "面部修复",
            "desc": "对当前图像进行面部修复"
        },
-        "upscale": {
-            "title": "放大",
-            "desc": "对当前图像进行放大"
-        },
        "showInfo": {
            "title": "显示信息",
            "desc": "显示当前图像的元数据"
@@ -422,8 +418,6 @@
        "type": "种类",
        "strength": "强度",
        "upscaling": "放大",
-        "upscale": "放大 (Shift + U)",
-        "upscaleImage": "放大图像",
        "scale": "等级",
        "imageFit": "使生成图像长宽适配初始图像",
        "scaleBeforeProcessing": "处理前缩放",
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/index.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/index.ts
@@ -1,5 +1,6 @@
 import type { TypedStartListening } from '@reduxjs/toolkit';
 import { createListenerMiddleware } from '@reduxjs/toolkit';
+import { addAdHocPostProcessingRequestedListener } from 'app/store/middleware/listenerMiddleware/listeners/addAdHocPostProcessingRequestedListener';
 import { addCommitStagingAreaImageListener } from 'app/store/middleware/listenerMiddleware/listeners/addCommitStagingAreaImageListener';
 import { addAnyEnqueuedListener } from 'app/store/middleware/listenerMiddleware/listeners/anyEnqueued';
 import { addAppConfigReceivedListener } from 'app/store/middleware/listenerMiddleware/listeners/appConfigReceived';
@@ -47,11 +48,11 @@ import { addModelLoadEventListener } from 'app/store/middleware/listenerMiddlewa
 import { addSocketQueueItemStatusChangedEventListener } from 'app/store/middleware/listenerMiddleware/listeners/socketio/socketQueueItemStatusChanged';
 import { addStagingAreaImageSavedListener } from 'app/store/middleware/listenerMiddleware/listeners/stagingAreaImageSaved';
 import { addUpdateAllNodesRequestedListener } from 'app/store/middleware/listenerMiddleware/listeners/updateAllNodesRequested';
-import { addUpscaleRequestedListener } from 'app/store/middleware/listenerMiddleware/listeners/upscaleRequested';
 import { addWorkflowLoadRequestedListener } from 'app/store/middleware/listenerMiddleware/listeners/workflowLoadRequested';
 import type { AppDispatch, RootState } from 'app/store/store';

 import { addArchivedOrDeletedBoardListener } from './listeners/addArchivedOrDeletedBoardListener';
+import { addEnqueueRequestedUpscale } from './listeners/enqueueRequestedUpscale';

 export const listenerMiddleware = createListenerMiddleware();

@@ -85,6 +86,7 @@ addGalleryOffsetChangedListener(startAppListening);
 addEnqueueRequestedCanvasListener(startAppListening);
 addEnqueueRequestedNodes(startAppListening);
 addEnqueueRequestedLinear(startAppListening);
+addEnqueueRequestedUpscale(startAppListening);
 addAnyEnqueuedListener(startAppListening);
 addBatchEnqueuedListener(startAppListening);

@@ -140,7 +142,7 @@ addModelsLoadedListener(startAppListening);
 addAppConfigReceivedListener(startAppListening);

 // Ad-hoc upscale workflwo
-addUpscaleRequestedListener(startAppListening);
+addAdHocPostProcessingRequestedListener(startAppListening);

 // Prompts
 addDynamicPromptsListener(startAppListening);
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/addAdHocPostProcessingRequestedListener.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/addAdHocPostProcessingRequestedListener.ts
@@ -2,46 +2,28 @@ import { createAction } from '@reduxjs/toolkit';
 import { logger } from 'app/logging/logger';
 import type { AppStartListening } from 'app/store/middleware/listenerMiddleware';
 import { parseify } from 'common/util/serialize';
-import { buildAdHocUpscaleGraph } from 'features/nodes/util/graph/buildAdHocUpscaleGraph';
-import { createIsAllowedToUpscaleSelector } from 'features/parameters/hooks/useIsAllowedToUpscale';
+import { buildAdHocPostProcessingGraph } from 'features/nodes/util/graph/buildAdHocPostProcessingGraph';
 import { toast } from 'features/toast/toast';
 import { t } from 'i18next';
 import { queueApi } from 'services/api/endpoints/queue';
 import type { BatchConfig, ImageDTO } from 'services/api/types';

-export const upscaleRequested = createAction<{ imageDTO: ImageDTO }>(`upscale/upscaleRequested`);
+export const adHocPostProcessingRequested = createAction<{ imageDTO: ImageDTO }>(`upscaling/postProcessingRequested`);

-export const addUpscaleRequestedListener = (startAppListening: AppStartListening) => {
+export const addAdHocPostProcessingRequestedListener = (startAppListening: AppStartListening) => {
  startAppListening({
-    actionCreator: upscaleRequested,
+    actionCreator: adHocPostProcessingRequested,
    effect: async (action, { dispatch, getState }) => {
      const log = logger('session');

      const { imageDTO } = action.payload;
-      const { image_name } = imageDTO;
      const state = getState();

-      const { isAllowedToUpscale, detailTKey } = createIsAllowedToUpscaleSelector(imageDTO)(state);
-
-      // if we can't upscale, show a toast and return
-      if (!isAllowedToUpscale) {
-        log.error(
-          { imageDTO },
-          t(detailTKey ?? 'parameters.isAllowedToUpscale.tooLarge') // should never coalesce
-        );
-        toast({
-          id: 'NOT_ALLOWED_TO_UPSCALE',
-          title: t(detailTKey ?? 'parameters.isAllowedToUpscale.tooLarge'), // should never coalesce
-          status: 'error',
-        });
-        return;
-      }
-
      const enqueueBatchArg: BatchConfig = {
        prepend: true,
        batch: {
-          graph: buildAdHocUpscaleGraph({
-            image_name,
+          graph: await buildAdHocPostProcessingGraph({
+            image: imageDTO,
            state,
          }),
          runs: 1,
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/addArchivedOrDeletedBoardListener.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/addArchivedOrDeletedBoardListener.ts
@@ -10,32 +10,32 @@ import {
 import { boardsApi } from 'services/api/endpoints/boards';
 import { imagesApi } from 'services/api/endpoints/images';

+// Type inference doesn't work for this if you inline it in the listener for some reason
+const matchAnyBoardDeleted = isAnyOf(
+  imagesApi.endpoints.deleteBoard.matchFulfilled,
+  imagesApi.endpoints.deleteBoardAndImages.matchFulfilled
+);
+
 export const addArchivedOrDeletedBoardListener = (startAppListening: AppStartListening) => {
  /**
   * The auto-add board shouldn't be set to an archived board or deleted board. When we archive a board, delete
   * a board, or change a the archived board visibility flag, we may need to reset the auto-add board.
   */
  startAppListening({
-    matcher: isAnyOf(
-      // If a board is deleted, we'll need to reset the auto-add board
-      imagesApi.endpoints.deleteBoard.matchFulfilled,
-      imagesApi.endpoints.deleteBoardAndImages.matchFulfilled
-    ),
+    matcher: matchAnyBoardDeleted,
    effect: async (action, { dispatch, getState }) => {
      const state = getState();
-      const queryArgs = selectListBoardsQueryArgs(state);
-      const queryResult = boardsApi.endpoints.listAllBoards.select(queryArgs)(state);
+      const deletedBoardId = action.meta.arg.originalArgs;
      const { autoAddBoardId, selectedBoardId } = state.gallery;

-      if (!queryResult.data) {
-        return;
-      }
-
-      if (!queryResult.data.find((board) => board.board_id === selectedBoardId)) {
+      // If the deleted board was currently selected, we should reset the selected board to uncategorized
+      if (deletedBoardId === selectedBoardId) {
        dispatch(boardIdSelected({ boardId: 'none' }));
        dispatch(galleryViewChanged('images'));
      }
-      if (!queryResult.data.find((board) => board.board_id === autoAddBoardId)) {
+
+      // If the deleted board was selected for auto-add, we should reset the auto-add board to uncategorized
+      if (deletedBoardId === autoAddBoardId) {
        dispatch(autoAddBoardIdChanged('none'));
      }
    },
@@ -46,14 +46,8 @@ export const addArchivedOrDeletedBoardListener = (startAppListening: AppStartLis
    matcher: boardsApi.endpoints.updateBoard.matchFulfilled,
    effect: async (action, { dispatch, getState }) => {
      const state = getState();
-      const queryArgs = selectListBoardsQueryArgs(state);
-      const queryResult = boardsApi.endpoints.listAllBoards.select(queryArgs)(state);
      const { shouldShowArchivedBoards } = state.gallery;

-      if (!queryResult.data) {
-        return;
-      }
-
      const wasArchived = action.meta.arg.originalArgs.changes.archived === true;

      if (wasArchived && !shouldShowArchivedBoards) {
@@ -71,7 +65,7 @@ export const addArchivedOrDeletedBoardListener = (startAppListening: AppStartLis
      const shouldShowArchivedBoards = action.payload;

      // We only need to take action if we have just hidden archived boards.
-      if (!shouldShowArchivedBoards) {
+      if (shouldShowArchivedBoards) {
        return;
      }

@@ -86,14 +80,16 @@ export const addArchivedOrDeletedBoardListener = (startAppListening: AppStartLis

      // Handle the case where selected board is archived
      const selectedBoard = queryResult.data.find((b) => b.board_id === selectedBoardId);
-      if (selectedBoard && selectedBoard.archived) {
+      if (!selectedBoard || selectedBoard.archived) {
+        // If we can't find the selected board or it's archived, we should reset the selected board to uncategorized
        dispatch(boardIdSelected({ boardId: 'none' }));
        dispatch(galleryViewChanged('images'));
      }

      // Handle the case where auto-add board is archived
      const autoAddBoard = queryResult.data.find((b) => b.board_id === autoAddBoardId);
-      if (autoAddBoard && autoAddBoard.archived) {
+      if (!autoAddBoard || autoAddBoard.archived) {
+        // If we can't find the auto-add board or it's archived, we should reset the selected board to uncategorized
        dispatch(autoAddBoardIdChanged('none'));
      }
    },
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/enqueueRequestedUpscale.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/enqueueRequestedUpscale.ts
@@ -0,0 +1,36 @@
+import { enqueueRequested } from 'app/store/actions';
+import type { AppStartListening } from 'app/store/middleware/listenerMiddleware';
+import { isImageViewerOpenChanged } from 'features/gallery/store/gallerySlice';
+import { prepareLinearUIBatch } from 'features/nodes/util/graph/buildLinearBatchConfig';
+import { buildMultidiffusionUpscaleGraph } from 'features/nodes/util/graph/buildMultidiffusionUpscaleGraph';
+import { queueApi } from 'services/api/endpoints/queue';
+
+export const addEnqueueRequestedUpscale = (startAppListening: AppStartListening) => {
+  startAppListening({
+    predicate: (action): action is ReturnType<typeof enqueueRequested> =>
+      enqueueRequested.match(action) && action.payload.tabName === 'upscaling',
+    effect: async (action, { getState, dispatch }) => {
+      const state = getState();
+      const { shouldShowProgressInViewer } = state.ui;
+      const { prepend } = action.payload;
+
+      const graph = await buildMultidiffusionUpscaleGraph(state);
+
+      const batchConfig = prepareLinearUIBatch(state, graph, prepend);
+
+      const req = dispatch(
+        queueApi.endpoints.enqueueBatch.initiate(batchConfig, {
+          fixedCacheKey: 'enqueueBatch',
+        })
+      );
+      try {
+        await req.unwrap();
+        if (shouldShowProgressInViewer) {
+          dispatch(isImageViewerOpenChanged(true));
+        }
+      } finally {
+        req.reset();
+      }
+    },
+  });
+};
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/imageDropped.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/imageDropped.ts
@@ -23,6 +23,7 @@ import {
 } from 'features/gallery/store/gallerySlice';
 import { fieldImageValueChanged } from 'features/nodes/store/nodesSlice';
 import { selectOptimalDimension } from 'features/parameters/store/generationSlice';
+import { upscaleInitialImageChanged } from 'features/parameters/store/upscaleSlice';
 import { imagesApi } from 'services/api/endpoints/images';

 export const dndDropped = createAction<{
@@ -243,6 +244,20 @@ export const addImageDroppedListener = (startAppListening: AppStartListening) =>
        return;
      }

+      /**
+       * Image dropped on upscale initial image
+       */
+      if (
+        overData.actionType === 'SET_UPSCALE_INITIAL_IMAGE' &&
+        activeData.payloadType === 'IMAGE_DTO' &&
+        activeData.payload.imageDTO
+      ) {
+        const { imageDTO } = activeData.payload;
+
+        dispatch(upscaleInitialImageChanged(imageDTO));
+        return;
+      }
+
      /**
       * Multiple images dropped on user board
       */
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/imageUploaded.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/imageUploaded.ts
@@ -14,6 +14,7 @@ import {
 import { selectListBoardsQueryArgs } from 'features/gallery/store/gallerySelectors';
 import { fieldImageValueChanged } from 'features/nodes/store/nodesSlice';
 import { selectOptimalDimension } from 'features/parameters/store/generationSlice';
+import { upscaleInitialImageChanged } from 'features/parameters/store/upscaleSlice';
 import { toast } from 'features/toast/toast';
 import { t } from 'i18next';
 import { omit } from 'lodash-es';
@@ -89,6 +90,15 @@ export const addImageUploadedFulfilledListener = (startAppListening: AppStartLis
        return;
      }

+      if (postUploadAction?.type === 'SET_UPSCALE_INITIAL_IMAGE') {
+        dispatch(upscaleInitialImageChanged(imageDTO));
+        toast({
+          ...DEFAULT_UPLOADED_TOAST,
+          description: 'set as upscale initial image',
+        });
+        return;
+      }
+
      if (postUploadAction?.type === 'SET_CONTROL_ADAPTER_IMAGE') {
        const { id } = postUploadAction;
        dispatch(
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelsLoaded.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelsLoaded.ts
@@ -10,6 +10,7 @@ import { heightChanged, widthChanged } from 'features/controlLayers/store/contro
 import { loraRemoved } from 'features/lora/store/loraSlice';
 import { calculateNewSize } from 'features/parameters/components/ImageSize/calculateNewSize';
 import { modelChanged, vaeSelected } from 'features/parameters/store/generationSlice';
+import { postProcessingModelChanged, upscaleModelChanged } from 'features/parameters/store/upscaleSlice';
 import { zParameterModel, zParameterVAEModel } from 'features/parameters/types/parameterSchemas';
 import { getIsSizeOptimal, getOptimalDimension } from 'features/parameters/util/optimalDimension';
 import { refinerModelChanged } from 'features/sdxl/store/sdxlSlice';
@@ -17,7 +18,12 @@ import { forEach } from 'lodash-es';
 import type { Logger } from 'roarr';
 import { modelConfigsAdapterSelectors, modelsApi } from 'services/api/endpoints/models';
 import type { AnyModelConfig } from 'services/api/types';
-import { isNonRefinerMainModelConfig, isRefinerMainModelModelConfig, isVAEModelConfig } from 'services/api/types';
+import {
+  isNonRefinerMainModelConfig,
+  isRefinerMainModelModelConfig,
+  isSpandrelImageToImageModelConfig,
+  isVAEModelConfig,
+} from 'services/api/types';

 export const addModelsLoadedListener = (startAppListening: AppStartListening) => {
  startAppListening({
@@ -36,6 +42,7 @@ export const addModelsLoadedListener = (startAppListening: AppStartListening) =>
      handleVAEModels(models, state, dispatch, log);
      handleLoRAModels(models, state, dispatch, log);
      handleControlAdapterModels(models, state, dispatch, log);
+      handleSpandrelImageToImageModels(models, state, dispatch, log);
    },
  });
 };
@@ -177,3 +184,25 @@ const handleControlAdapterModels: ModelHandler = (models, state, dispatch, _log)
    dispatch(controlAdapterModelCleared({ id: ca.id }));
  });
 };
+
+const handleSpandrelImageToImageModels: ModelHandler = (models, state, dispatch, _log) => {
+  const { upscaleModel: currentUpscaleModel, postProcessingModel: currentPostProcessingModel } = state.upscale;
+  const upscaleModels = models.filter(isSpandrelImageToImageModelConfig);
+  const firstModel = upscaleModels[0] || null;
+
+  const isCurrentUpscaleModelAvailable = currentUpscaleModel
+    ? upscaleModels.some((m) => m.key === currentUpscaleModel.key)
+    : false;
+
+  if (!isCurrentUpscaleModelAvailable) {
+    dispatch(upscaleModelChanged(firstModel));
+  }
+
+  const isCurrentPostProcessingModelAvailable = currentPostProcessingModel
+    ? upscaleModels.some((m) => m.key === currentPostProcessingModel.key)
+    : false;
+
+  if (!isCurrentPostProcessingModelAvailable) {
+    dispatch(postProcessingModelChanged(firstModel));
+  }
+};
--- a/invokeai/frontend/web/src/app/store/store.ts
+++ b/invokeai/frontend/web/src/app/store/store.ts
@@ -25,7 +25,7 @@ import { nodesPersistConfig, nodesSlice, nodesUndoableConfig } from 'features/no
 import { workflowSettingsPersistConfig, workflowSettingsSlice } from 'features/nodes/store/workflowSettingsSlice';
 import { workflowPersistConfig, workflowSlice } from 'features/nodes/store/workflowSlice';
 import { generationPersistConfig, generationSlice } from 'features/parameters/store/generationSlice';
-import { postprocessingPersistConfig, postprocessingSlice } from 'features/parameters/store/postprocessingSlice';
+import { upscalePersistConfig, upscaleSlice } from 'features/parameters/store/upscaleSlice';
 import { queueSlice } from 'features/queue/store/queueSlice';
 import { sdxlPersistConfig, sdxlSlice } from 'features/sdxl/store/sdxlSlice';
 import { configSlice } from 'features/system/store/configSlice';
@@ -52,7 +52,6 @@ const allReducers = {
  [gallerySlice.name]: gallerySlice.reducer,
  [generationSlice.name]: generationSlice.reducer,
  [nodesSlice.name]: undoable(nodesSlice.reducer, nodesUndoableConfig),
-  [postprocessingSlice.name]: postprocessingSlice.reducer,
  [systemSlice.name]: systemSlice.reducer,
  [configSlice.name]: configSlice.reducer,
  [uiSlice.name]: uiSlice.reducer,
@@ -69,6 +68,7 @@ const allReducers = {
  [controlLayersSlice.name]: undoable(controlLayersSlice.reducer, controlLayersUndoableConfig),
  [workflowSettingsSlice.name]: workflowSettingsSlice.reducer,
  [api.reducerPath]: api.reducer,
+  [upscaleSlice.name]: upscaleSlice.reducer,
 };

 const rootReducer = combineReducers(allReducers);
@@ -102,7 +102,6 @@ const persistConfigs: { [key in keyof typeof allReducers]?: PersistConfig } = {
  [galleryPersistConfig.name]: galleryPersistConfig,
  [generationPersistConfig.name]: generationPersistConfig,
  [nodesPersistConfig.name]: nodesPersistConfig,
-  [postprocessingPersistConfig.name]: postprocessingPersistConfig,
  [systemPersistConfig.name]: systemPersistConfig,
  [workflowPersistConfig.name]: workflowPersistConfig,
  [uiPersistConfig.name]: uiPersistConfig,
@@ -114,6 +113,7 @@ const persistConfigs: { [key in keyof typeof allReducers]?: PersistConfig } = {
  [hrfPersistConfig.name]: hrfPersistConfig,
  [controlLayersPersistConfig.name]: controlLayersPersistConfig,
  [workflowSettingsPersistConfig.name]: workflowSettingsPersistConfig,
+  [upscalePersistConfig.name]: upscalePersistConfig,
 };

 const unserialize: UnserializeFunction = (data, key) => {
--- a/invokeai/frontend/web/src/app/types/invokeai.ts
+++ b/invokeai/frontend/web/src/app/types/invokeai.ts
@@ -72,7 +72,6 @@ export type AppConfig = {
  canRestoreDeletedImagesFromBin: boolean;
  nodesAllowlist: string[] | undefined;
  nodesDenylist: string[] | undefined;
-  maxUpscalePixels?: number;
  metadataFetchDebounce?: number;
  workflowFetchDebounce?: number;
  isLocal?: boolean;
--- a/invokeai/frontend/web/src/common/components/InformationalPopover/InformationalPopover.tsx
+++ b/invokeai/frontend/web/src/common/components/InformationalPopover/InformationalPopover.tsx
@@ -10,9 +10,12 @@ import {
  PopoverContent,
  PopoverTrigger,
  Portal,
+  Spacer,
  Text,
 } from '@invoke-ai/ui-library';
-import { useAppSelector } from 'app/store/storeHooks';
+import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
+import { setShouldEnableInformationalPopovers } from 'features/system/store/systemSlice';
+import { toast } from 'features/toast/toast';
 import { merge, omit } from 'lodash-es';
 import type { ReactElement } from 'react';
 import { memo, useCallback, useMemo } from 'react';
@@ -71,7 +74,7 @@ type ContentProps = {

 const Content = ({ data, feature }: ContentProps) => {
  const { t } = useTranslation();
-
+  const dispatch = useAppDispatch();
  const heading = useMemo<string | undefined>(() => t(`popovers.${feature}.heading`), [feature, t]);

  const paragraphs = useMemo<string[]>(
@@ -82,16 +85,25 @@ const Content = ({ data, feature }: ContentProps) => {
    [feature, t]
  );

-  const handleClick = useCallback(() => {
+  const onClickLearnMore = useCallback(() => {
    if (!data?.href) {
      return;
    }
    window.open(data.href);
  }, [data?.href]);

+  const onClickDontShowMeThese = useCallback(() => {
+    dispatch(setShouldEnableInformationalPopovers(false));
+    toast({
+      title: t('settings.informationalPopoversDisabled'),
+      description: t('settings.informationalPopoversDisabledDesc'),
+      status: 'info',
+    });
+  }, [dispatch, t]);
+
  return (
-    <PopoverContent w={96}>
-      <PopoverCloseButton />
+    <PopoverContent maxW={300}>
+      <PopoverCloseButton top={2} />
      <PopoverBody>
        <Flex gap={2} flexDirection="column" alignItems="flex-start">
          {heading && (
@@ -116,20 +128,19 @@ const Content = ({ data, feature }: ContentProps) => {
          {paragraphs.map((p) => (
            <Text key={p}>{p}</Text>
          ))}
-          {data?.href && (
-            <>
-              <Divider />
-              <Button
-                pt={1}
-                onClick={handleClick}
-                leftIcon={<PiArrowSquareOutBold />}
-                alignSelf="flex-end"
-                variant="link"
-              >
+
+          <Divider />
+          <Flex alignItems="center" justifyContent="space-between" w="full">
+            <Button onClick={onClickDontShowMeThese} variant="link" size="sm">
+              {t('common.dontShowMeThese')}
+            </Button>
+            <Spacer />
+            {data?.href && (
+              <Button onClick={onClickLearnMore} leftIcon={<PiArrowSquareOutBold />} variant="link" size="sm">
                {t('common.learnMore') ?? heading}
              </Button>
-            </>
-          )}
+            )}
+          </Flex>
        </Flex>
      </PopoverBody>
    </PopoverContent>
--- a/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts
+++ b/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts
@@ -53,7 +53,11 @@ export type Feature =
  | 'refinerCfgScale'
  | 'scaleBeforeProcessing'
  | 'seamlessTilingXAxis'
-  | 'seamlessTilingYAxis';
+  | 'seamlessTilingYAxis'
+  | 'upscaleModel'
+  | 'scale'
+  | 'creativity'
+  | 'structure';

 export type PopoverData = PopoverProps & {
  image?: string;
--- a/invokeai/frontend/web/src/common/hooks/useAssertSingleton.ts
+++ b/invokeai/frontend/web/src/common/hooks/useAssertSingleton.ts
@@ -0,0 +1,18 @@
+import { useEffect } from 'react';
+import { assert } from 'tsafe';
+
+const IDS = new Set<string>();
+
+/**
+ * Asserts that there is only one instance of a singleton entity. It can be a hook or a component.
+ * @param id The ID of the singleton entity.
+ */
+export function useAssertSingleton(id: string) {
+  useEffect(() => {
+    assert(!IDS.has(id), `There should be only one instance of ${id}`);
+    IDS.add(id);
+    return () => {
+      IDS.delete(id);
+    };
+  }, [id]);
+}
--- a/invokeai/frontend/web/src/common/hooks/useFullscreenDropzone.ts
+++ b/invokeai/frontend/web/src/common/hooks/useFullscreenDropzone.ts
@@ -21,6 +21,10 @@ const selectPostUploadAction = createMemoizedSelector(activeTabNameSelector, (ac
    postUploadAction = { type: 'SET_CANVAS_INITIAL_IMAGE' };
  }

+  if (activeTabName === 'upscaling') {
+    postUploadAction = { type: 'SET_UPSCALE_INITIAL_IMAGE' };
+  }
+
  return postUploadAction;
 });

--- a/invokeai/frontend/web/src/common/hooks/useIsReadyToEnqueue.ts
+++ b/invokeai/frontend/web/src/common/hooks/useIsReadyToEnqueue.ts
@@ -15,6 +15,7 @@ import type { Templates } from 'features/nodes/store/types';
 import { selectWorkflowSettingsSlice } from 'features/nodes/store/workflowSettingsSlice';
 import { isInvocationNode } from 'features/nodes/types/invocation';
 import { selectGenerationSlice } from 'features/parameters/store/generationSlice';
+import { selectUpscalelice } from 'features/parameters/store/upscaleSlice';
 import { selectSystemSlice } from 'features/system/store/systemSlice';
 import { activeTabNameSelector } from 'features/ui/store/uiSelectors';
 import i18n from 'i18next';
@@ -40,8 +41,19 @@ const createSelector = (templates: Templates) =>
      selectDynamicPromptsSlice,
      selectControlLayersSlice,
      activeTabNameSelector,
+      selectUpscalelice,
    ],
-    (controlAdapters, generation, system, nodes, workflowSettings, dynamicPrompts, controlLayers, activeTabName) => {
+    (
+      controlAdapters,
+      generation,
+      system,
+      nodes,
+      workflowSettings,
+      dynamicPrompts,
+      controlLayers,
+      activeTabName,
+      upscale
+    ) => {
      const { model } = generation;
      const { size } = controlLayers.present;
      const { positivePrompt } = controlLayers.present;
@@ -194,6 +206,16 @@ const createSelector = (templates: Templates) =>
                reasons.push({ prefix, content });
              }
            });
+        } else if (activeTabName === 'upscaling') {
+          if (!upscale.upscaleInitialImage) {
+            reasons.push({ content: i18n.t('upscaling.missingUpscaleInitialImage') });
+          }
+          if (!upscale.upscaleModel) {
+            reasons.push({ content: i18n.t('upscaling.missingUpscaleModel') });
+          }
+          if (!upscale.tileControlnetModel) {
+            reasons.push({ content: i18n.t('upscaling.missingTileControlNetModel') });
+          }
        } else {
          // Handling for all other tabs
          selectControlAdapterAll(controlAdapters)
--- a/invokeai/frontend/web/src/features/dnd/types/index.ts
+++ b/invokeai/frontend/web/src/features/dnd/types/index.ts
@@ -62,6 +62,10 @@ export type CanvasInitialImageDropData = BaseDropData & {
  actionType: 'SET_CANVAS_INITIAL_IMAGE';
 };

+type UpscaleInitialImageDropData = BaseDropData & {
+  actionType: 'SET_UPSCALE_INITIAL_IMAGE';
+};
+
 type NodesImageDropData = BaseDropData & {
  actionType: 'SET_NODES_IMAGE';
  context: {
@@ -98,7 +102,8 @@ export type TypesafeDroppableData =
  | IPALayerImageDropData
  | RGLayerIPAdapterImageDropData
  | IILayerImageDropData
-  | SelectForCompareDropData;
+  | SelectForCompareDropData
+  | UpscaleInitialImageDropData;

 type BaseDragData = {
  id: string;
--- a/invokeai/frontend/web/src/features/dnd/util/isValidDrop.ts
+++ b/invokeai/frontend/web/src/features/dnd/util/isValidDrop.ts
@@ -27,6 +27,8 @@ export const isValidDrop = (overData?: TypesafeDroppableData | null, activeData?
      return payloadType === 'IMAGE_DTO';
    case 'SET_CANVAS_INITIAL_IMAGE':
      return payloadType === 'IMAGE_DTO';
+    case 'SET_UPSCALE_INITIAL_IMAGE':
+      return payloadType === 'IMAGE_DTO';
    case 'SET_NODES_IMAGE':
      return payloadType === 'IMAGE_DTO';
    case 'SELECT_FOR_COMPARE':
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardTooltip.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardTooltip.tsx
@@ -0,0 +1,47 @@
+import { Flex, Image, Text } from '@invoke-ai/ui-library';
+import { skipToken } from '@reduxjs/toolkit/query';
+import { useTranslation } from 'react-i18next';
+import { useGetBoardAssetsTotalQuery, useGetBoardImagesTotalQuery } from 'services/api/endpoints/boards';
+import { useGetImageDTOQuery } from 'services/api/endpoints/images';
+import type { BoardDTO } from 'services/api/types';
+
+type Props = {
+  board: BoardDTO | null;
+};
+
+export const BoardTooltip = ({ board }: Props) => {
+  const { t } = useTranslation();
+  const { imagesTotal } = useGetBoardImagesTotalQuery(board?.board_id || 'none', {
+    selectFromResult: ({ data }) => {
+      return { imagesTotal: data?.total ?? 0 };
+    },
+  });
+  const { assetsTotal } = useGetBoardAssetsTotalQuery(board?.board_id || 'none', {
+    selectFromResult: ({ data }) => {
+      return { assetsTotal: data?.total ?? 0 };
+    },
+  });
+  const { currentData: coverImage } = useGetImageDTOQuery(board?.cover_image_name ?? skipToken);
+
+  return (
+    <Flex flexDir="column" alignItems="center" gap={1}>
+      {coverImage && (
+        <Image
+          src={coverImage.thumbnail_url}
+          draggable={false}
+          objectFit="cover"
+          maxW={150}
+          aspectRatio="1/1"
+          borderRadius="base"
+          borderBottomRadius="lg"
+        />
+      )}
+      <Flex flexDir="column" alignItems="center">
+        <Text noOfLines={1}>
+          {t('boards.imagesWithCount', { count: imagesTotal })}, {t('boards.assetsWithCount', { count: assetsTotal })}
+        </Text>
+        {board?.archived && <Text>({t('boards.archived')})</Text>}
+      </Flex>
+    </Flex>
+  );
+};
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardTotalsTooltip.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardTotalsTooltip.tsx
@@ -1,22 +0,0 @@
-import { useTranslation } from 'react-i18next';
-import { useGetBoardAssetsTotalQuery, useGetBoardImagesTotalQuery } from 'services/api/endpoints/boards';
-
-type Props = {
-  board_id: string;
-  isArchived: boolean;
-};
-
-export const BoardTotalsTooltip = ({ board_id, isArchived }: Props) => {
-  const { t } = useTranslation();
-  const { imagesTotal } = useGetBoardImagesTotalQuery(board_id, {
-    selectFromResult: ({ data }) => {
-      return { imagesTotal: data?.total ?? 0 };
-    },
-  });
-  const { assetsTotal } = useGetBoardAssetsTotalQuery(board_id, {
-    selectFromResult: ({ data }) => {
-      return { assetsTotal: data?.total ?? 0 };
-    },
-  });
-  return `${t('boards.imagesWithCount', { count: imagesTotal })}, ${t('boards.assetsWithCount', { count: assetsTotal })}${isArchived ? ` (${t('boards.archived')})` : ''}`;
-};
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsList.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsList.tsx
@@ -1,13 +1,10 @@
-import { Box, Flex, Text } from '@invoke-ai/ui-library';
+import { Button, Collapse, Flex, Icon, Text, useDisclosure } from '@invoke-ai/ui-library';
 import { EMPTY_ARRAY } from 'app/store/constants';
 import { useAppSelector } from 'app/store/storeHooks';
-import { overlayScrollbarsParams } from 'common/components/OverlayScrollbars/constants';
-import DeleteBoardModal from 'features/gallery/components/Boards/DeleteBoardModal';
 import { selectListBoardsQueryArgs } from 'features/gallery/store/gallerySelectors';
-import { OverlayScrollbarsComponent } from 'overlayscrollbars-react';
-import type { CSSProperties } from 'react';
-import { memo, useMemo, useState } from 'react';
+import { useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
+import { PiCaretDownBold } from 'react-icons/pi';
 import { useListAllBoardsQuery } from 'services/api/endpoints/boards';
 import type { BoardDTO } from 'services/api/types';

@@ -15,101 +12,111 @@ import AddBoardButton from './AddBoardButton';
 import GalleryBoard from './GalleryBoard';
 import NoBoardBoard from './NoBoardBoard';

-const overlayScrollbarsStyles: CSSProperties = {
-  height: '100%',
-  width: '100%',
+type Props = {
+  isPrivate: boolean;
+  setBoardToDelete: (board?: BoardDTO) => void;
 };

-const BoardsList = () => {
+export const BoardsList = ({ isPrivate, setBoardToDelete }: Props) => {
+  const { t } = useTranslation();
  const selectedBoardId = useAppSelector((s) => s.gallery.selectedBoardId);
  const boardSearchText = useAppSelector((s) => s.gallery.boardSearchText);
-  const allowPrivateBoards = useAppSelector((s) => s.config.allowPrivateBoards);
  const queryArgs = useAppSelector(selectListBoardsQueryArgs);
  const { data: boards } = useListAllBoardsQuery(queryArgs);
-  const [boardToDelete, setBoardToDelete] = useState<BoardDTO>();
-  const { t } = useTranslation();
+  const allowPrivateBoards = useAppSelector((s) => s.config.allowPrivateBoards);
+  const { isOpen, onToggle } = useDisclosure({ defaultIsOpen: true });

-  const { filteredPrivateBoards, filteredSharedBoards } = useMemo(() => {
-    const filteredBoards = boardSearchText
-      ? boards?.filter((board) => board.board_name.toLowerCase().includes(boardSearchText.toLowerCase()))
-      : boards;
-    const filteredPrivateBoards = filteredBoards?.filter((board) => board.is_private) ?? EMPTY_ARRAY;
-    const filteredSharedBoards = filteredBoards?.filter((board) => !board.is_private) ?? EMPTY_ARRAY;
-    return { filteredPrivateBoards, filteredSharedBoards };
-  }, [boardSearchText, boards]);
+  const filteredBoards = useMemo(() => {
+    if (!boards) {
+      return EMPTY_ARRAY;
+    }
+
+    return boards.filter((board) => {
+      if (boardSearchText.length) {
+        return board.is_private === isPrivate && board.board_name.toLowerCase().includes(boardSearchText.toLowerCase());
+      } else {
+        return board.is_private === isPrivate;
+      }
+    });
+  }, [boardSearchText, boards, isPrivate]);
+
+  const boardElements = useMemo(() => {
+    const elements = [];
+    if (allowPrivateBoards && isPrivate && !boardSearchText.length) {
+      elements.push(<NoBoardBoard key="none" isSelected={selectedBoardId === 'none'} />);
+    }
+
+    if (!allowPrivateBoards && !boardSearchText.length) {
+      elements.push(<NoBoardBoard key="none" isSelected={selectedBoardId === 'none'} />);
+    }
+
+    filteredBoards.forEach((board) => {
+      elements.push(
+        <GalleryBoard
+          board={board}
+          isSelected={selectedBoardId === board.board_id}
+          setBoardToDelete={setBoardToDelete}
+          key={board.board_id}
+        />
+      );
+    });
+
+    return elements;
+  }, [allowPrivateBoards, isPrivate, boardSearchText.length, filteredBoards, selectedBoardId, setBoardToDelete]);
+
+  const boardListTitle = useMemo(() => {
+    if (allowPrivateBoards) {
+      return isPrivate ? t('boards.private') : t('boards.shared');
+    } else {
+      return t('boards.boards');
+    }
+  }, [isPrivate, allowPrivateBoards, t]);

  return (
-    <>
-      <Box position="relative" w="full" h="full">
-        <Box position="absolute" top={0} right={0} bottom={0} left={0}>
-          <OverlayScrollbarsComponent defer style={overlayScrollbarsStyles} options={overlayScrollbarsParams.options}>
-            {allowPrivateBoards && (
-              <Flex direction="column" gap={1}>
-                <Flex
-                  position="sticky"
-                  w="full"
-                  justifyContent="space-between"
-                  alignItems="center"
-                  ps={2}
-                  pb={1}
-                  pt={2}
-                  zIndex={1}
-                  top={0}
-                  bg="base.900"
-                >
-                  <Text fontSize="md" fontWeight="semibold" userSelect="none">
-                    {t('boards.private')}
-                  </Text>
-                  <AddBoardButton isPrivateBoard={true} />
-                </Flex>
-                <Flex direction="column" gap={1}>
-                  <NoBoardBoard isSelected={selectedBoardId === 'none'} />
-                  {filteredPrivateBoards.map((board) => (
-                    <GalleryBoard
-                      board={board}
-                      isSelected={selectedBoardId === board.board_id}
-                      setBoardToDelete={setBoardToDelete}
-                      key={board.board_id}
-                    />
-                  ))}
-                </Flex>
-              </Flex>
-            )}
-            <Flex direction="column" gap={1}>
-              <Flex
-                position="sticky"
-                w="full"
-                justifyContent="space-between"
-                alignItems="center"
-                ps={2}
-                pb={1}
-                pt={2}
-                zIndex={1}
-                top={0}
-                bg="base.900"
-              >
-                <Text fontSize="md" fontWeight="semibold" userSelect="none">
-                  {allowPrivateBoards ? t('boards.shared') : t('boards.boards')}
-                </Text>
-                <AddBoardButton isPrivateBoard={false} />
-              </Flex>
-              <Flex direction="column" gap={1}>
-                {!allowPrivateBoards && <NoBoardBoard isSelected={selectedBoardId === 'none'} />}
-                {filteredSharedBoards.map((board) => (
-                  <GalleryBoard
-                    board={board}
-                    isSelected={selectedBoardId === board.board_id}
-                    setBoardToDelete={setBoardToDelete}
-                    key={board.board_id}
-                  />
-                ))}
-              </Flex>
+    <Flex direction="column">
+      <Flex
+        position="sticky"
+        w="full"
+        justifyContent="space-between"
+        alignItems="center"
+        ps={2}
+        py={1}
+        zIndex={1}
+        top={0}
+        bg="base.900"
+      >
+        {allowPrivateBoards ? (
+          <Button variant="unstyled" onClick={onToggle}>
+            <Flex gap="2" alignItems="center">
+              <Icon
+                boxSize={4}
+                as={PiCaretDownBold}
+                transform={isOpen ? undefined : 'rotate(-90deg)'}
+                fill="base.500"
+              />
+              <Text fontSize="sm" fontWeight="semibold" userSelect="none" color="base.500">
+                {boardListTitle}
+              </Text>
            </Flex>
-          </OverlayScrollbarsComponent>
-        </Box>
-      </Box>
-      <DeleteBoardModal boardToDelete={boardToDelete} setBoardToDelete={setBoardToDelete} />
-    </>
+          </Button>
+        ) : (
+          <Text fontSize="sm" fontWeight="semibold" userSelect="none" color="base.500">
+            {boardListTitle}
+          </Text>
+        )}
+        <AddBoardButton isPrivateBoard={isPrivate} />
+      </Flex>
+      <Collapse in={isOpen}>
+        <Flex direction="column" gap={1}>
+          {boardElements.length ? (
+            boardElements
+          ) : (
+            <Text variant="subtext" textAlign="center">
+              {t('boards.noBoards', { boardType: boardSearchText.length ? 'Matching' : '' })}
+            </Text>
+          )}
+        </Flex>
+      </Collapse>
+    </Flex>
  );
 };
-export default memo(BoardsList);
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsListWrapper.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsListWrapper.tsx
@@ -0,0 +1,35 @@
+import { Box } from '@invoke-ai/ui-library';
+import { useAppSelector } from 'app/store/storeHooks';
+import { overlayScrollbarsParams } from 'common/components/OverlayScrollbars/constants';
+import DeleteBoardModal from 'features/gallery/components/Boards/DeleteBoardModal';
+import { OverlayScrollbarsComponent } from 'overlayscrollbars-react';
+import type { CSSProperties } from 'react';
+import { memo, useState } from 'react';
+import type { BoardDTO } from 'services/api/types';
+
+import { BoardsList } from './BoardsList';
+
+const overlayScrollbarsStyles: CSSProperties = {
+  height: '100%',
+  width: '100%',
+};
+
+const BoardsListWrapper = () => {
+  const allowPrivateBoards = useAppSelector((s) => s.config.allowPrivateBoards);
+  const [boardToDelete, setBoardToDelete] = useState<BoardDTO>();
+
+  return (
+    <>
+      <Box position="relative" w="full" h="full">
+        <Box position="absolute" top={0} right={0} bottom={0} left={0}>
+          <OverlayScrollbarsComponent defer style={overlayScrollbarsStyles} options={overlayScrollbarsParams.options}>
+            {allowPrivateBoards && <BoardsList isPrivate={true} setBoardToDelete={setBoardToDelete} />}
+            <BoardsList isPrivate={false} setBoardToDelete={setBoardToDelete} />
+          </OverlayScrollbarsComponent>
+        </Box>
+      </Box>
+      <DeleteBoardModal boardToDelete={boardToDelete} setBoardToDelete={setBoardToDelete} />
+    </>
+  );
+};
+export default memo(BoardsListWrapper);
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsSearch.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/BoardsSearch.tsx
@@ -40,7 +40,7 @@ const BoardsSearch = () => {
  );

  return (
-    <InputGroup pt={2}>
+    <InputGroup>
      <Input
        placeholder={t('boards.searchBoard')}
        value={boardSearchText}
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/GalleryBoard.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/GalleryBoard.tsx
@@ -17,7 +17,7 @@ import IAIDroppable from 'common/components/IAIDroppable';
 import type { AddToBoardDropData } from 'features/dnd/types';
 import { AutoAddBadge } from 'features/gallery/components/Boards/AutoAddBadge';
 import BoardContextMenu from 'features/gallery/components/Boards/BoardContextMenu';
-import { BoardTotalsTooltip } from 'features/gallery/components/Boards/BoardsList/BoardTotalsTooltip';
+import { BoardTooltip } from 'features/gallery/components/Boards/BoardsList/BoardTooltip';
 import { autoAddBoardIdChanged, boardIdSelected } from 'features/gallery/store/gallerySlice';
 import type { MouseEvent, MouseEventHandler, MutableRefObject } from 'react';
 import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react';
@@ -115,12 +115,7 @@ const GalleryBoard = ({ board, isSelected, setBoardToDelete }: GalleryBoardProps
  return (
    <BoardContextMenu board={board} setBoardToDelete={setBoardToDelete}>
      {(ref) => (
-        <Tooltip
-          label={<BoardTotalsTooltip board_id={board.board_id} isArchived={Boolean(board.archived)} />}
-          openDelay={1000}
-          placement="left"
-          closeOnScroll
-        >
+        <Tooltip label={<BoardTooltip board={board} />} openDelay={1000} placement="left" closeOnScroll p={2}>
          <Flex
            position="relative"
            ref={ref}
@@ -131,10 +126,12 @@ const GalleryBoard = ({ board, isSelected, setBoardToDelete }: GalleryBoardProps
            borderRadius="base"
            cursor="pointer"
            py={1}
-            px={2}
-            gap={2}
+            ps={1}
+            pe={4}
+            gap={4}
            bg={isSelected ? 'base.850' : undefined}
            _hover={_hover}
+            h={12}
          >
            <CoverImage board={board} />
            <Editable
@@ -149,17 +146,17 @@ const GalleryBoard = ({ board, isSelected, setBoardToDelete }: GalleryBoardProps
              onChange={onChange}
              onSubmit={onSubmit}
              isPreviewFocusable={false}
+              fontSize="sm"
            >
              <EditablePreview
                cursor="pointer"
                p={0}
-                fontSize="md"
+                fontSize="sm"
                textOverflow="ellipsis"
                noOfLines={1}
                w="fit-content"
                wordBreak="break-all"
-                color={isSelected ? 'base.100' : 'base.400'}
-                fontWeight={isSelected ? 'semibold' : 'normal'}
+                fontWeight={isSelected ? 'bold' : 'normal'}
              />
              <EditableInput sx={editableInputStyles} />
              <JankEditableHijack onStartEditingRef={onStartEditingRef} />
@@ -168,7 +165,7 @@ const GalleryBoard = ({ board, isSelected, setBoardToDelete }: GalleryBoardProps
            {board.archived && !editingDisclosure.isOpen && <Icon as={PiArchiveBold} fill="base.300" />}
            {!editingDisclosure.isOpen && <Text variant="subtext">{board.image_count}</Text>}

-            <IAIDroppable data={droppableData} dropLabel={<Text fontSize="md">{t('unifiedCanvas.move')}</Text>} />
+            <IAIDroppable data={droppableData} dropLabel={<Text fontSize="lg">{t('unifiedCanvas.move')}</Text>} />
          </Flex>
        </Tooltip>
      )}
@@ -197,8 +194,8 @@ const CoverImage = ({ board }: { board: BoardDTO }) => {
        src={coverImage.thumbnail_url}
        draggable={false}
        objectFit="cover"
-        w={8}
-        h={8}
+        w={10}
+        h={10}
        borderRadius="base"
        borderBottomRadius="lg"
      />
@@ -206,8 +203,8 @@ const CoverImage = ({ board }: { board: BoardDTO }) => {
  }

  return (
-    <Flex w={8} h={8} justifyContent="center" alignItems="center">
-      <Icon boxSize={8} as={PiImageSquare} opacity={0.7} color="base.500" />
+    <Flex w={10} h={10} justifyContent="center" alignItems="center">
+      <Icon boxSize={10} as={PiImageSquare} opacity={0.7} color="base.500" />
    </Flex>
  );
 };
--- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/NoBoardBoard.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardsList/NoBoardBoard.tsx
@@ -4,7 +4,7 @@ import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
 import IAIDroppable from 'common/components/IAIDroppable';
 import type { RemoveFromBoardDropData } from 'features/dnd/types';
 import { AutoAddBadge } from 'features/gallery/components/Boards/AutoAddBadge';
-import { BoardTotalsTooltip } from 'features/gallery/components/Boards/BoardsList/BoardTotalsTooltip';
+import { BoardTooltip } from 'features/gallery/components/Boards/BoardsList/BoardTooltip';
 import NoBoardBoardContextMenu from 'features/gallery/components/Boards/NoBoardBoardContextMenu';
 import { autoAddBoardIdChanged, boardIdSelected } from 'features/gallery/store/gallerySlice';
 import { memo, useCallback, useMemo } from 'react';
@@ -46,25 +46,16 @@ const NoBoardBoard = memo(({ isSelected }: Props) => {
    []
  );

-  const filteredOut = useMemo(() => {
-    return boardSearchText ? !boardName.toLowerCase().includes(boardSearchText.toLowerCase()) : false;
-  }, [boardName, boardSearchText]);
-
  const { t } = useTranslation();

-  if (filteredOut) {
+  if (boardSearchText.length) {
    return null;
  }

  return (
    <NoBoardBoardContextMenu>
      {(ref) => (
-        <Tooltip
-          label={<BoardTotalsTooltip board_id="none" isArchived={false} />}
-          openDelay={1000}
-          placement="left"
-          closeOnScroll
-        >
+        <Tooltip label={<BoardTooltip board={null} />} openDelay={1000} placement="left" closeOnScroll>
          <Flex
            position="relative"
            ref={ref}
@@ -73,15 +64,17 @@ const NoBoardBoard = memo(({ isSelected }: Props) => {
            alignItems="center"
            borderRadius="base"
            cursor="pointer"
-            px={2}
            py={1}
-            gap={2}
+            ps={1}
+            pe={4}
+            gap={4}
            bg={isSelected ? 'base.850' : undefined}
            _hover={_hover}
+            h={12}
          >
-            <Flex w={8} h={8} justifyContent="center" alignItems="center">
+            <Flex w="10" justifyContent="space-around">
              {/* iconified from public/assets/images/invoke-symbol-wht-lrg.svg */}
-              <Icon boxSize={6} opacity={1} stroke="base.500" viewBox="0 0 66 66" fill="none">
+              <Icon boxSize={8} opacity={1} stroke="base.500" viewBox="0 0 66 66" fill="none">
                <path
                  d="M43.9137 16H63.1211V3H3.12109V16H22.3285L43.9137 50H63.1211V63H3.12109V50H22.3285"
                  strokeWidth="5"
@@ -89,18 +82,12 @@ const NoBoardBoard = memo(({ isSelected }: Props) => {
              </Icon>
            </Flex>

-            <Text
-              fontSize="md"
-              color={isSelected ? 'base.100' : 'base.400'}
-              fontWeight={isSelected ? 'semibold' : 'normal'}
-              noOfLines={1}
-              flexGrow={1}
-            >
+            <Text fontSize="sm" fontWeight={isSelected ? 'bold' : 'normal'} noOfLines={1} flexGrow={1}>
              {boardName}
            </Text>
            {autoAddBoardId === 'none' && <AutoAddBadge />}
            <Text variant="subtext">{imagesTotal}</Text>
-            <IAIDroppable data={droppableData} dropLabel={<Text fontSize="md">{t('unifiedCanvas.move')}</Text>} />
+            <IAIDroppable data={droppableData} dropLabel={<Text fontSize="lg">{t('unifiedCanvas.move')}</Text>} />
          </Flex>
        </Tooltip>
      )}
--- a/invokeai/frontend/web/src/features/gallery/components/Gallery.tsx
+++ b/invokeai/frontend/web/src/features/gallery/components/Gallery.tsx
@@ -0,0 +1,105 @@
+import type { ChakraProps } from '@invoke-ai/ui-library';
+import {
+  Box,
+  Collapse,
+  Flex,
+  IconButton,
+  Spacer,
+  Tab,
+  TabList,
+  Tabs,
+  Text,
+  useDisclosure,
+} from '@invoke-ai/ui-library';
+import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
+import { useGallerySearchTerm } from 'features/gallery/components/ImageGrid/useGallerySearchTerm';
+import { galleryViewChanged } from 'features/gallery/store/gallerySlice';
+import type { CSSProperties } from 'react';
+import { useCallback } from 'react';
+import { useTranslation } from 'react-i18next';
+import { PiMagnifyingGlassBold } from 'react-icons/pi';
+import { useBoardName } from 'services/api/hooks/useBoardName';
+
+import GalleryImageGrid from './ImageGrid/GalleryImageGrid';
+import { GalleryPagination } from './ImageGrid/GalleryPagination';
+import { GallerySearch } from './ImageGrid/GallerySearch';
+
+const BASE_STYLES: ChakraProps['sx'] = {
+  fontWeight: 'semibold',
+  fontSize: 'sm',
+  color: 'base.300',
+};
+
+const SELECTED_STYLES: ChakraProps['sx'] = {
+  borderColor: 'base.800',
+  borderBottomColor: 'base.900',
+  color: 'invokeBlue.300',
+};
+
+const COLLAPSE_STYLES: CSSProperties = { flexShrink: 0, minHeight: 0 };
+
+export const Gallery = () => {
+  const { t } = useTranslation();
+  const dispatch = useAppDispatch();
+  const galleryView = useAppSelector((s) => s.gallery.galleryView);
+  const initialSearchTerm = useAppSelector((s) => s.gallery.searchTerm);
+  const searchDisclosure = useDisclosure({ defaultIsOpen: initialSearchTerm.length > 0 });
+  const [searchTerm, onChangeSearchTerm, onResetSearchTerm] = useGallerySearchTerm();
+
+  const handleClickImages = useCallback(() => {
+    dispatch(galleryViewChanged('images'));
+  }, [dispatch]);
+
+  const handleClickAssets = useCallback(() => {
+    dispatch(galleryViewChanged('assets'));
+  }, [dispatch]);
+
+  const handleClickSearch = useCallback(() => {
+    searchDisclosure.onToggle();
+    onResetSearchTerm();
+  }, [onResetSearchTerm, searchDisclosure]);
+
+  const selectedBoardId = useAppSelector((s) => s.gallery.selectedBoardId);
+  const boardName = useBoardName(selectedBoardId);
+
+  return (
+    <Flex flexDirection="column" alignItems="center" justifyContent="space-between" h="full" w="full" pt={1}>
+      <Tabs index={galleryView === 'images' ? 0 : 1} variant="enclosed" display="flex" flexDir="column" w="full">
+        <TabList gap={2} fontSize="sm" borderColor="base.800" alignItems="center" w="full">
+          <Text fontSize="sm" fontWeight="semibold" noOfLines={1} px="2">
+            {boardName}
+          </Text>
+          <Spacer />
+          <Tab sx={BASE_STYLES} _selected={SELECTED_STYLES} onClick={handleClickImages} data-testid="images-tab">
+            {t('parameters.images')}
+          </Tab>
+          <Tab sx={BASE_STYLES} _selected={SELECTED_STYLES} onClick={handleClickAssets} data-testid="assets-tab">
+            {t('gallery.assets')}
+          </Tab>
+          <IconButton
+            onClick={handleClickSearch}
+            tooltip={searchDisclosure.isOpen ? `${t('gallery.exitSearch')}` : `${t('gallery.displaySearch')}`}
+            aria-label={t('gallery.displaySearch')}
+            icon={<PiMagnifyingGlassBold />}
+            colorScheme={searchDisclosure.isOpen ? 'invokeBlue' : 'base'}
+            variant="link"
+          />
+        </TabList>
+      </Tabs>
+
+      <Box w="full">
+        <Collapse in={searchDisclosure.isOpen} style={COLLAPSE_STYLES}>
+          <Box w="full" pt={2}>
+            <GallerySearch
+              searchTerm={searchTerm}
+              onChangeSearchTerm={onChangeSearchTerm}
+              onResetSearchTerm={onResetSearchTerm}
+            />
+          </Box>
+        </Collapse>
+      </Box>
+      <GalleryImageGrid />
+      <GalleryPagination />
+    </Flex>
+  );
+};
--- a/Show More
+++ b/Show More