Update DepthAnything post-processing logic to avoid artifacts caused by numerical overflow.

Add utility ImagePanelLayoutInvocation for working with In-Context LoRA workflows.
Use a Textarea component for the FluxTextEncoderInvocation prompt field.
2026-01-19 20:58:32 -05:00 · 2024-11-27 14:54:30 +00:00 · 2024-11-26 20:58:31 -08:00 · 2024-11-26 20:58:31 -08:00 · 2024-11-26 16:17:12 -05:00 · 2024-11-26 11:25:53 -05:00
13 changed files with 146 additions and 31 deletions
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+Only the latest version of Invoke will receive security updates. 
+We do not currently maintain multiple versions of the application with updates.
+
+## Reporting a Vulnerability
+
+To report a vulnerability, contact the Invoke team directly at security@invoke.ai
+
+At this time, we do not maintain a formal bug bounty program. 
+
+You can also share identified security issues with our team on huntr.com
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -334,8 +334,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                dtype=inference_dtype,
            )

-            # activities = [torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
-            # with torch.profiler.profile(activities=activities, record_shapes=True, with_stack=True) as prof:
            x = denoise(
                model=transformer,
                img=x,
@@ -355,7 +353,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                pos_ip_adapter_extensions=pos_ip_adapter_extensions,
                neg_ip_adapter_extensions=neg_ip_adapter_extensions,
            )
-            # prof.export_chrome_trace("trace.json")

        x = unpack(x.float(), self.height, self.width)
        return x
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -5,7 +5,7 @@ import torch
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer

 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, UIComponent
 from invokeai.app.invocations.model import CLIPField, T5EncoderField
 from invokeai.app.invocations.primitives import FluxConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -41,7 +41,10 @@ class FluxTextEncoderInvocation(BaseInvocation):
    t5_max_seq_len: Literal[256, 512] = InputField(
        description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models."
    )
-    prompt: str = InputField(description="Text prompt to encode.")
+    prompt: str = InputField(
+        description="Text prompt to encode.",
+        ui_component=UIComponent.Textarea,
+    )

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
--- a/invokeai/app/invocations/image_panels.py
+++ b/invokeai/app/invocations/image_panels.py
@@ -0,0 +1,59 @@
+from pydantic import ValidationInfo, field_validator
+
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    Classification,
+    invocation,
+    invocation_output,
+)
+from invokeai.app.invocations.fields import InputField, OutputField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+
+
+@invocation_output("image_panel_coordinate_output")
+class ImagePanelCoordinateOutput(BaseInvocationOutput):
+    x_left: int = OutputField(description="The left x-coordinate of the panel.")
+    y_top: int = OutputField(description="The top y-coordinate of the panel.")
+    width: int = OutputField(description="The width of the panel.")
+    height: int = OutputField(description="The height of the panel.")
+
+
+@invocation(
+    "image_panel_layout",
+    title="Image Panel Layout",
+    tags=["image", "panel", "layout"],
+    category="image",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class ImagePanelLayoutInvocation(BaseInvocation):
+    """Get the coordinates of a single panel in a grid. (If the full image shape cannot be divided evenly into panels,
+    then the grid may not cover the entire image.)
+    """
+
+    width: int = InputField(description="The width of the entire grid.")
+    height: int = InputField(description="The height of the entire grid.")
+    num_cols: int = InputField(ge=1, default=1, description="The number of columns in the grid.")
+    num_rows: int = InputField(ge=1, default=1, description="The number of rows in the grid.")
+    panel_col_idx: int = InputField(ge=0, default=0, description="The column index of the panel to be processed.")
+    panel_row_idx: int = InputField(ge=0, default=0, description="The row index of the panel to be processed.")
+
+    @field_validator("panel_col_idx")
+    def validate_panel_col_idx(cls, v: int, info: ValidationInfo) -> int:
+        if v < 0 or v >= info.data["num_cols"]:
+            raise ValueError(f"panel_col_idx must be between 0 and {info.data['num_cols'] - 1}")
+        return v
+
+    @field_validator("panel_row_idx")
+    def validate_panel_row_idx(cls, v: int, info: ValidationInfo) -> int:
+        if v < 0 or v >= info.data["num_rows"]:
+            raise ValueError(f"panel_row_idx must be between 0 and {info.data['num_rows'] - 1}")
+        return v
+
+    def invoke(self, context: InvocationContext) -> ImagePanelCoordinateOutput:
+        x_left = self.panel_col_idx * (self.width // self.num_cols)
+        y_top = self.panel_row_idx * (self.height // self.num_rows)
+        width = self.width // self.num_cols
+        height = self.height // self.num_rows
+        return ImagePanelCoordinateOutput(x_left=x_left, y_top=y_top, width=width, height=height)
--- a/invokeai/app/services/model_load/model_load_default.py
+++ b/invokeai/app/services/model_load/model_load_default.py
@@ -86,7 +86,7 @@ class ModelLoadService(ModelLoadServiceBase):

        def torch_load_file(checkpoint: Path) -> AnyModel:
            scan_result = scan_file_path(checkpoint)
-            if scan_result.infected_files != 0:
+            if scan_result.infected_files != 0 or scan_result.scan_err:
                raise Exception("The model at {checkpoint} is potentially infected by malware. Aborting load.")
            result = torch_load(checkpoint, map_location="cpu")
            return result
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -16,17 +16,20 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:

 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
-    scale = torch.arange(0, dim, 2, dtype=pos.dtype, device=pos.device) / dim
+    scale = (
+        torch.arange(0, dim, 2, dtype=torch.float32 if pos.device.type == "mps" else torch.float64, device=pos.device)
+        / dim
+    )
    omega = 1.0 / (theta**scale)
    out = torch.einsum("...n,d->...nd", pos, omega)
    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
-    return out
+    return out.float()


 def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
-    xq_ = xq.view(*xq.shape[:-1], -1, 1, 2)
-    xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
+    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
+    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.view(*xq.shape), xk_out.view(*xk.shape)
+    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/modules/layers.py
+++ b/invokeai/backend/flux/modules/layers.py
@@ -66,7 +66,10 @@ class RMSNorm(torch.nn.Module):
        self.scale = nn.Parameter(torch.ones(dim))

    def forward(self, x: Tensor):
-        return torch.nn.functional.rms_norm(x, self.scale.shape, self.scale, eps=1e-6)
+        x_dtype = x.dtype
+        x = x.float()
+        rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
+        return (x * rrms).to(dtype=x_dtype) * self.scale


 class QKNorm(torch.nn.Module):
--- a/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
+++ b/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
@@ -17,9 +17,23 @@ class DepthAnythingPipeline(RawModel):
        self._pipeline = pipeline

    def generate_depth(self, image: Image.Image) -> Image.Image:
-        depth_map = self._pipeline(image)["depth"]
-        assert isinstance(depth_map, Image.Image)
-        return depth_map
+        pipeline_result = self._pipeline(image)
+        predicted_depth = pipeline_result["predicted_depth"]
+        assert isinstance(predicted_depth, torch.Tensor)
+
+        # Convert to PIL Image.
+        # Note: The pipeline already returns a PIL Image (pipeline_result["depth"]), but it contains artifacts as
+        # described here: https://github.com/invoke-ai/InvokeAI/issues/7358.
+        # We implement custom post-processing logic to avoid the artifacts.
+        prediction = torch.nn.functional.interpolate(
+            predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bilinear", align_corners=False
+        )
+        prediction = prediction / prediction.max()
+        output = prediction.squeeze().cpu().numpy()
+        output = (output * 255).clip(0, 255)
+        formatted = output.astype("uint8")
+        depth = Image.fromarray(formatted)
+        return depth

    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        if device is not None and device.type not in {"cpu", "cuda"}:
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -469,7 +469,7 @@ class ModelProbe(object):
        """
        # scan model
        scan_result = scan_file_path(checkpoint)
-        if scan_result.infected_files != 0:
+        if scan_result.infected_files != 0 or scan_result.scan_err:
            raise Exception("The model {model_name} is potentially infected by malware. Aborting import.")


--- a/invokeai/backend/model_manager/util/model_util.py
+++ b/invokeai/backend/model_manager/util/model_util.py
@@ -44,7 +44,7 @@ def _fast_safetensors_reader(path: str) -> Dict[str, torch.Tensor]:
    return checkpoint


-def read_checkpoint_meta(path: Union[str, Path], scan: bool = False) -> Dict[str, torch.Tensor]:
+def read_checkpoint_meta(path: Union[str, Path], scan: bool = True) -> Dict[str, torch.Tensor]:
    if str(path).endswith(".safetensors"):
        try:
            path_str = path.as_posix() if isinstance(path, Path) else path
@@ -55,7 +55,7 @@ def read_checkpoint_meta(path: Union[str, Path], scan: bool = False) -> Dict[str
    else:
        if scan:
            scan_result = scan_file_path(path)
-            if scan_result.infected_files != 0:
+            if scan_result.infected_files != 0 or scan_result.scan_err:
                raise Exception(f'The model file "{path}" is potentially infected by malware. Aborting import.')
        if str(path).endswith(".gguf"):
            # The GGUF reader used here uses numpy memmap, so these tensors are not loaded into memory during this function
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -1319,8 +1319,9 @@
        "controlNetBeginEnd": {
            "heading": "Begin / End Step Percentage",
            "paragraphs": [
-                "The part of the of the denoising process that will have the Control Adapter applied.",
-                "Generally, Control Adapters applied at the start of the process guide composition, and Control Adapters applied at the end guide details."
+                "This setting determines which portion of the denoising (generation) process incorporates the guidance from this layer.",
+                "• Start Step (%): Specifies when to begin applying the guidance from this layer during the generation process.",
+                "• End Step (%): Specifies when to stop applying this layer's guidance and revert general guidance from the model and other settings."
            ]
        },
        "controlNetControlMode": {
@@ -1338,13 +1339,15 @@
            "paragraphs": ["Method to fit Control Adapter's input image size to the output generation size."]
        },
        "ipAdapterMethod": {
-            "heading": "Method",
-            "paragraphs": ["Method by which to apply the current IP Adapter."]
+            "heading": "Mode",
+            "paragraphs": ["The mode defines how the reference image will guide the generation process."]
        },
        "controlNetWeight": {
            "heading": "Weight",
            "paragraphs": [
-                "Weight of the Control Adapter. Higher weight will lead to larger impacts on the final image."
+                "Adjusts how strongly the layer influences the generation process",
+                "• Higher Weight (.75-2): Creates a more significant impact on the final result.",
+                "• Lower Weight (0-.75): Creates a smaller impact on the final result."
            ]
        },
        "dynamicPrompts": {
@@ -1803,10 +1806,13 @@
            "megaControl": "Mega Control"
        },
        "ipAdapterMethod": {
-            "ipAdapterMethod": "IP Adapter Method",
+            "ipAdapterMethod": "Mode",
            "full": "Style and Composition",
+            "fullDesc": "Applies visual style (colors, textures) & composition (layout, structure).",
            "style": "Style Only",
-            "composition": "Composition Only"
+            "styleDesc": "Applies visual style (colors, textures) without considering its layout.",
+            "composition": "Composition Only",
+            "compositionDesc": "Replicates layout & structure while ignoring the reference's style."
        },
        "fill": {
            "fillColor": "Fill Color",
--- a/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterMethod.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/IPAdapter/IPAdapterMethod.tsx
@@ -1,8 +1,10 @@
 import type { ComboboxOnChange } from '@invoke-ai/ui-library';
 import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library';
+import { useAppSelector } from 'app/store/storeHooks';
 import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover';
 import type { IPMethodV2 } from 'features/controlLayers/store/types';
 import { isIPMethodV2 } from 'features/controlLayers/store/types';
+import { selectSystemShouldEnableModelDescriptions } from 'features/system/store/systemSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { assert } from 'tsafe';
@@ -14,13 +16,27 @@ type Props = {

 export const IPAdapterMethod = memo(({ method, onChange }: Props) => {
  const { t } = useTranslation();
+  const shouldShowModelDescriptions = useAppSelector(selectSystemShouldEnableModelDescriptions);
+
  const options: { label: string; value: IPMethodV2 }[] = useMemo(
    () => [
-      { label: t('controlLayers.ipAdapterMethod.full'), value: 'full' },
-      { label: t('controlLayers.ipAdapterMethod.style'), value: 'style' },
-      { label: t('controlLayers.ipAdapterMethod.composition'), value: 'composition' },
+      {
+        label: t('controlLayers.ipAdapterMethod.full'),
+        value: 'full',
+        description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.fullDesc') : undefined,
+      },
+      {
+        label: t('controlLayers.ipAdapterMethod.style'),
+        value: 'style',
+        description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.styleDesc') : undefined,
+      },
+      {
+        label: t('controlLayers.ipAdapterMethod.composition'),
+        value: 'composition',
+        description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.compositionDesc') : undefined,
+      },
    ],
-    [t]
+    [t, shouldShowModelDescriptions]
  );
  const _onChange = useCallback<ComboboxOnChange>(
    (v) => {
--- a/invokeai/frontend/web/src/features/system/components/VideosModal/data.ts
+++ b/invokeai/frontend/web/src/features/system/components/VideosModal/data.ts
@@ -37,8 +37,8 @@ export const gettingStartedVideos: VideoData[] = [
  },
  {
    tKey: 'creatingAndComposingOnInvokesControlCanvas',
-    link: 'https://www.youtube.com/watch?v=MohWv5GZVGM&list=PLvWK1Kc8iXGrQy8r9TYg6QdUuJ5MMx-ZO&index=5&t=28s&pp=iAQB',
-    length: { minutes: 13, seconds: 56 },
+    link: 'https://www.youtube.com/watch?v=O4LaFcYFxlA',
+    length: { minutes: 2, seconds: 52 },
  },
  {
    tKey: 'upscaling',
Author	SHA1	Message	Date
Ryan Dick	e22f0f2203	Update DepthAnything post-processing logic to avoid artifacts caused by numerical overflow.	2024-11-27 14:54:30 +00:00
Ryan Dick	8cfb032051	Add utility ImagePanelLayoutInvocation for working with In-Context LoRA workflows.	2024-11-26 20:58:31 -08:00
Ryan Dick	06a9d4e2b2	Use a Textarea component for the FluxTextEncoderInvocation prompt field.	2024-11-26 20:58:31 -08:00
Brandon Rising	ed46acee79	fix: Fail scan on InvalidMagicError in picklescan, update default for read_checkpoint_meta to scan unless explicitly told not to	2024-11-26 16:17:12 -05:00
Mary Hipp	965cd76e33	lint fix	2024-11-26 11:25:53 -05:00
Mary Hipp	e5e8cbf34c	shorten reference image mode descriptions;	2024-11-26 11:25:53 -05:00
Mary Hipp	3412a52594	(ui): updates various informational tooltips, adds descriptons to IP adapter method options	2024-11-26 11:25:53 -05:00
Kent Keirsey	059336258f	Create SECURITY.md	2024-11-25 04:10:03 -08:00