Remove gradient checkpointing from DiffusersControlNetFlux.

2026-04-23 03:00:31 -04:00 · 2024-10-04 15:14:39 +00:00
parent 70495665c5
commit 7a22819100
1 changed files with 12 additions and 63 deletions
--- a/invokeai/backend/flux/controlnet/diffusers_controlnet_flux.py
+++ b/invokeai/backend/flux/controlnet/diffusers_controlnet_flux.py
@@ -6,7 +6,7 @@
 # 2. We need to sort out https://github.com/invoke-ai/InvokeAI/pull/6740 before we can bump the diffusers package.

 from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple, Union
+from typing import List, Tuple, Union

 import torch
 import torch.nn as nn
@@ -23,8 +23,6 @@ class FluxControlNetOutput(BaseOutput):


 class DiffusersControlNetFlux(ModelMixin, ConfigMixin):
-    _supports_gradient_checkpointing = True
-
    @register_to_config
    def __init__(
        self,
@@ -92,12 +90,6 @@ class DiffusersControlNetFlux(ModelMixin, ConfigMixin):

        self.controlnet_x_embedder = zero_module(torch.nn.Linear(in_channels, self.inner_dim))

-        self.gradient_checkpointing = False
-
-    def _set_gradient_checkpointing(self, module, value=False):
-        if hasattr(module, "gradient_checkpointing"):
-            module.gradient_checkpointing = value
-
    @classmethod
    def from_transformer(
        cls,
@@ -217,66 +209,23 @@ class DiffusersControlNetFlux(ModelMixin, ConfigMixin):

        block_samples = ()
        for index_block, block in enumerate(self.transformer_blocks):
-            if self.training and self.gradient_checkpointing:
-
-                def create_custom_forward(module, return_dict=None):
-                    def custom_forward(*inputs):
-                        if return_dict is not None:
-                            return module(*inputs, return_dict=return_dict)
-                        else:
-                            return module(*inputs)
-
-                    return custom_forward
-
-                ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-                encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
-                    create_custom_forward(block),
-                    hidden_states,
-                    encoder_hidden_states,
-                    temb,
-                    image_rotary_emb,
-                    **ckpt_kwargs,
-                )
-
-            else:
-                encoder_hidden_states, hidden_states = block(
-                    hidden_states=hidden_states,
-                    encoder_hidden_states=encoder_hidden_states,
-                    temb=temb,
-                    image_rotary_emb=image_rotary_emb,
-                )
+            encoder_hidden_states, hidden_states = block(
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+            )
            block_samples = block_samples + (hidden_states,)

        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)

        single_block_samples = ()
        for index_block, block in enumerate(self.single_transformer_blocks):
-            if self.training and self.gradient_checkpointing:
-
-                def create_custom_forward(module, return_dict=None):
-                    def custom_forward(*inputs):
-                        if return_dict is not None:
-                            return module(*inputs, return_dict=return_dict)
-                        else:
-                            return module(*inputs)
-
-                    return custom_forward
-
-                ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-                hidden_states = torch.utils.checkpoint.checkpoint(
-                    create_custom_forward(block),
-                    hidden_states,
-                    temb,
-                    image_rotary_emb,
-                    **ckpt_kwargs,
-                )
-
-            else:
-                hidden_states = block(
-                    hidden_states=hidden_states,
-                    temb=temb,
-                    image_rotary_emb=image_rotary_emb,
-                )
+            hidden_states = block(
+                hidden_states=hidden_states,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+            )
            single_block_samples = single_block_samples + (hidden_states[:, encoder_hidden_states.shape[1] :],)

        # controlnet block