From ea54a2655a4e0b709da1d7c038b48012eeab259b Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 8 Oct 2024 10:43:40 -0400
Subject: [PATCH] Add a workaround for broken sliced attention on MPS with
 torch 2.4.1.

---
 invokeai/backend/stable_diffusion/diffusers_pipeline.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
index 756a5816ab..311a44c2a1 100644
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -204,6 +204,15 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
             else:
                 raise Exception("torch-sdp attention slicing not available")
 
+        # See https://github.com/invoke-ai/InvokeAI/issues/7049 for context.
+        # Bumping torch from 2.2.2 to 2.4.1 caused the sliced attention implementation to produce incorrect results.
+        # For now, if a user is on an MPS device and has not explicitly set the attention_type, then we select the
+        # non-sliced torch-sdp implementation. This keeps things working on MPS at the cost of increased peak memory
+        # utilization.
+        if torch.backends.mps.is_available():
+            assert hasattr(torch.nn.functional, "scaled_dot_product_attention")
+            return
+
         # the remainder if this code is called when attention_type=='auto'
         if self.unet.device.type == "cuda":
             if is_xformers_available() and prefer_xformers: