From df77a12efe41003da81523aff613a488c2a97cf5 Mon Sep 17 00:00:00 2001
From: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
Date: Mon, 4 Aug 2025 21:08:11 +1000
Subject: [PATCH] refactor(backend): use torchvision transforms for Kontext
 image preprocessing

Replace numpy-based normalization with torchvision transforms for
consistency with other image processing in the codebase
---
 .../flux/extensions/kontext_extension.py       | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/invokeai/backend/flux/extensions/kontext_extension.py b/invokeai/backend/flux/extensions/kontext_extension.py
index f9d1cccf27..a5324b64d1 100644
--- a/invokeai/backend/flux/extensions/kontext_extension.py
+++ b/invokeai/backend/flux/extensions/kontext_extension.py
@@ -1,6 +1,5 @@
-import einops
-import numpy as np
 import torch
+import torchvision.transforms as T
 from einops import repeat
 from PIL import Image
 
@@ -136,10 +135,17 @@ class KontextExtension:
             # Use BICUBIC for smoother resizing to reduce artifacts
             image = image.resize((final_width, final_height), Image.Resampling.BICUBIC)
 
-            # Convert to tensor with same normalization as BFL
-            image_np = np.array(image)
-            image_tensor = torch.from_numpy(image_np).float() / 127.5 - 1.0
-            image_tensor = einops.rearrange(image_tensor, "h w c -> 1 c h w")
+            # Convert to tensor using torchvision transforms for consistency
+            # This matches the normalization used in image_resized_to_grid_as_tensor
+            transformation = T.Compose(
+                [
+                    T.ToTensor(),  # Converts PIL image to tensor and scales to [0, 1]
+                ]
+            )
+            image_tensor = transformation(image)
+            # Convert from [0, 1] to [-1, 1] range expected by VAE
+            image_tensor = image_tensor * 2.0 - 1.0
+            image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension
             image_tensor = image_tensor.to(self._device)
 
             # Continue with VAE encoding