Fix an edge case with model offloading from VRAM to RAM. If a GGML-quantized model is offloaded from VRAM inside of a torch.inference_mode() context manager, this will cause the following error: 'RuntimeError: Cannot set version_counter for inference tensor'.

2026-04-23 03:00:31 -04:00 · 2025-01-07 15:59:50 +00:00
parent 67e948b50d
commit 85eb4f0312
1 changed files with 2 additions and 2 deletions
--- a/invokeai/app/invocations/spandrel_image_to_image.py
+++ b/invokeai/app/invocations/spandrel_image_to_image.py
@@ -151,7 +151,7 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):

        return pil_image

-    @torch.inference_mode()
+    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> ImageOutput:
        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
        # revisit this.
@@ -197,7 +197,7 @@ class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
        description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
    )

-    @torch.inference_mode()
+    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> ImageOutput:
        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
        # revisit this.