Update DepthAnything post-processing logic to avoid artifacts caused by numerical overflow.

2026-04-23 03:00:31 -04:00 · 2024-11-27 14:54:30 +00:00
parent 8cfb032051
commit e22f0f2203
1 changed files with 17 additions and 3 deletions
--- a/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
+++ b/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
@@ -17,9 +17,23 @@ class DepthAnythingPipeline(RawModel):
        self._pipeline = pipeline

    def generate_depth(self, image: Image.Image) -> Image.Image:
-        depth_map = self._pipeline(image)["depth"]
-        assert isinstance(depth_map, Image.Image)
-        return depth_map
+        pipeline_result = self._pipeline(image)
+        predicted_depth = pipeline_result["predicted_depth"]
+        assert isinstance(predicted_depth, torch.Tensor)
+
+        # Convert to PIL Image.
+        # Note: The pipeline already returns a PIL Image (pipeline_result["depth"]), but it contains artifacts as
+        # described here: https://github.com/invoke-ai/InvokeAI/issues/7358.
+        # We implement custom post-processing logic to avoid the artifacts.
+        prediction = torch.nn.functional.interpolate(
+            predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bilinear", align_corners=False
+        )
+        prediction = prediction / prediction.max()
+        output = prediction.squeeze().cpu().numpy()
+        output = (output * 255).clip(0, 255)
+        formatted = output.astype("uint8")
+        depth = Image.fromarray(formatted)
+        return depth

    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        if device is not None and device.type not in {"cpu", "cuda"}: