From df77a12efe41003da81523aff613a488c2a97cf5 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:08:11 +1000 Subject: [PATCH] refactor(backend): use torchvision transforms for Kontext image preprocessing Replace numpy-based normalization with torchvision transforms for consistency with other image processing in the codebase --- .../flux/extensions/kontext_extension.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/flux/extensions/kontext_extension.py b/invokeai/backend/flux/extensions/kontext_extension.py index f9d1cccf27..a5324b64d1 100644 --- a/invokeai/backend/flux/extensions/kontext_extension.py +++ b/invokeai/backend/flux/extensions/kontext_extension.py @@ -1,6 +1,5 @@ -import einops -import numpy as np import torch +import torchvision.transforms as T from einops import repeat from PIL import Image @@ -136,10 +135,17 @@ class KontextExtension: # Use BICUBIC for smoother resizing to reduce artifacts image = image.resize((final_width, final_height), Image.Resampling.BICUBIC) - # Convert to tensor with same normalization as BFL - image_np = np.array(image) - image_tensor = torch.from_numpy(image_np).float() / 127.5 - 1.0 - image_tensor = einops.rearrange(image_tensor, "h w c -> 1 c h w") + # Convert to tensor using torchvision transforms for consistency + # This matches the normalization used in image_resized_to_grid_as_tensor + transformation = T.Compose( + [ + T.ToTensor(), # Converts PIL image to tensor and scales to [0, 1] + ] + ) + image_tensor = transformation(image) + # Convert from [0, 1] to [-1, 1] range expected by VAE + image_tensor = image_tensor * 2.0 - 1.0 + image_tensor = image_tensor.unsqueeze(0) # Add batch dimension image_tensor = image_tensor.to(self._device) # Continue with VAE encoding