Compare commits

..

8 Commits

13 changed files with 146 additions and 31 deletions

14
SECURITY.md Normal file
View File

@@ -0,0 +1,14 @@
# Security Policy
## Supported Versions
Only the latest version of Invoke will receive security updates.
We do not currently maintain multiple versions of the application with updates.
## Reporting a Vulnerability
To report a vulnerability, contact the Invoke team directly at security@invoke.ai
At this time, we do not maintain a formal bug bounty program.
You can also share identified security issues with our team on huntr.com

View File

@@ -334,8 +334,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
dtype=inference_dtype,
)
# activities = [torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
# with torch.profiler.profile(activities=activities, record_shapes=True, with_stack=True) as prof:
x = denoise(
model=transformer,
img=x,
@@ -355,7 +353,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
pos_ip_adapter_extensions=pos_ip_adapter_extensions,
neg_ip_adapter_extensions=neg_ip_adapter_extensions,
)
# prof.export_chrome_trace("trace.json")
x = unpack(x.float(), self.height, self.width)
return x

View File

@@ -5,7 +5,7 @@ import torch
from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, UIComponent
from invokeai.app.invocations.model import CLIPField, T5EncoderField
from invokeai.app.invocations.primitives import FluxConditioningOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -41,7 +41,10 @@ class FluxTextEncoderInvocation(BaseInvocation):
t5_max_seq_len: Literal[256, 512] = InputField(
description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models."
)
prompt: str = InputField(description="Text prompt to encode.")
prompt: str = InputField(
description="Text prompt to encode.",
ui_component=UIComponent.Textarea,
)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> FluxConditioningOutput:

View File

@@ -0,0 +1,59 @@
from pydantic import ValidationInfo, field_validator
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
Classification,
invocation,
invocation_output,
)
from invokeai.app.invocations.fields import InputField, OutputField
from invokeai.app.services.shared.invocation_context import InvocationContext
@invocation_output("image_panel_coordinate_output")
class ImagePanelCoordinateOutput(BaseInvocationOutput):
x_left: int = OutputField(description="The left x-coordinate of the panel.")
y_top: int = OutputField(description="The top y-coordinate of the panel.")
width: int = OutputField(description="The width of the panel.")
height: int = OutputField(description="The height of the panel.")
@invocation(
"image_panel_layout",
title="Image Panel Layout",
tags=["image", "panel", "layout"],
category="image",
version="1.0.0",
classification=Classification.Prototype,
)
class ImagePanelLayoutInvocation(BaseInvocation):
"""Get the coordinates of a single panel in a grid. (If the full image shape cannot be divided evenly into panels,
then the grid may not cover the entire image.)
"""
width: int = InputField(description="The width of the entire grid.")
height: int = InputField(description="The height of the entire grid.")
num_cols: int = InputField(ge=1, default=1, description="The number of columns in the grid.")
num_rows: int = InputField(ge=1, default=1, description="The number of rows in the grid.")
panel_col_idx: int = InputField(ge=0, default=0, description="The column index of the panel to be processed.")
panel_row_idx: int = InputField(ge=0, default=0, description="The row index of the panel to be processed.")
@field_validator("panel_col_idx")
def validate_panel_col_idx(cls, v: int, info: ValidationInfo) -> int:
if v < 0 or v >= info.data["num_cols"]:
raise ValueError(f"panel_col_idx must be between 0 and {info.data['num_cols'] - 1}")
return v
@field_validator("panel_row_idx")
def validate_panel_row_idx(cls, v: int, info: ValidationInfo) -> int:
if v < 0 or v >= info.data["num_rows"]:
raise ValueError(f"panel_row_idx must be between 0 and {info.data['num_rows'] - 1}")
return v
def invoke(self, context: InvocationContext) -> ImagePanelCoordinateOutput:
x_left = self.panel_col_idx * (self.width // self.num_cols)
y_top = self.panel_row_idx * (self.height // self.num_rows)
width = self.width // self.num_cols
height = self.height // self.num_rows
return ImagePanelCoordinateOutput(x_left=x_left, y_top=y_top, width=width, height=height)

View File

@@ -86,7 +86,7 @@ class ModelLoadService(ModelLoadServiceBase):
def torch_load_file(checkpoint: Path) -> AnyModel:
scan_result = scan_file_path(checkpoint)
if scan_result.infected_files != 0:
if scan_result.infected_files != 0 or scan_result.scan_err:
raise Exception("The model at {checkpoint} is potentially infected by malware. Aborting load.")
result = torch_load(checkpoint, map_location="cpu")
return result

View File

@@ -16,17 +16,20 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
assert dim % 2 == 0
scale = torch.arange(0, dim, 2, dtype=pos.dtype, device=pos.device) / dim
scale = (
torch.arange(0, dim, 2, dtype=torch.float32 if pos.device.type == "mps" else torch.float64, device=pos.device)
/ dim
)
omega = 1.0 / (theta**scale)
out = torch.einsum("...n,d->...nd", pos, omega)
out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
return out
return out.float()
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
xq_ = xq.view(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
return xq_out.view(*xq.shape), xk_out.view(*xk.shape)
return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)

View File

@@ -66,7 +66,10 @@ class RMSNorm(torch.nn.Module):
self.scale = nn.Parameter(torch.ones(dim))
def forward(self, x: Tensor):
return torch.nn.functional.rms_norm(x, self.scale.shape, self.scale, eps=1e-6)
x_dtype = x.dtype
x = x.float()
rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
return (x * rrms).to(dtype=x_dtype) * self.scale
class QKNorm(torch.nn.Module):

View File

@@ -17,9 +17,23 @@ class DepthAnythingPipeline(RawModel):
self._pipeline = pipeline
def generate_depth(self, image: Image.Image) -> Image.Image:
depth_map = self._pipeline(image)["depth"]
assert isinstance(depth_map, Image.Image)
return depth_map
pipeline_result = self._pipeline(image)
predicted_depth = pipeline_result["predicted_depth"]
assert isinstance(predicted_depth, torch.Tensor)
# Convert to PIL Image.
# Note: The pipeline already returns a PIL Image (pipeline_result["depth"]), but it contains artifacts as
# described here: https://github.com/invoke-ai/InvokeAI/issues/7358.
# We implement custom post-processing logic to avoid the artifacts.
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bilinear", align_corners=False
)
prediction = prediction / prediction.max()
output = prediction.squeeze().cpu().numpy()
output = (output * 255).clip(0, 255)
formatted = output.astype("uint8")
depth = Image.fromarray(formatted)
return depth
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
if device is not None and device.type not in {"cpu", "cuda"}:

View File

@@ -469,7 +469,7 @@ class ModelProbe(object):
"""
# scan model
scan_result = scan_file_path(checkpoint)
if scan_result.infected_files != 0:
if scan_result.infected_files != 0 or scan_result.scan_err:
raise Exception("The model {model_name} is potentially infected by malware. Aborting import.")

View File

@@ -44,7 +44,7 @@ def _fast_safetensors_reader(path: str) -> Dict[str, torch.Tensor]:
return checkpoint
def read_checkpoint_meta(path: Union[str, Path], scan: bool = False) -> Dict[str, torch.Tensor]:
def read_checkpoint_meta(path: Union[str, Path], scan: bool = True) -> Dict[str, torch.Tensor]:
if str(path).endswith(".safetensors"):
try:
path_str = path.as_posix() if isinstance(path, Path) else path
@@ -55,7 +55,7 @@ def read_checkpoint_meta(path: Union[str, Path], scan: bool = False) -> Dict[str
else:
if scan:
scan_result = scan_file_path(path)
if scan_result.infected_files != 0:
if scan_result.infected_files != 0 or scan_result.scan_err:
raise Exception(f'The model file "{path}" is potentially infected by malware. Aborting import.')
if str(path).endswith(".gguf"):
# The GGUF reader used here uses numpy memmap, so these tensors are not loaded into memory during this function

View File

@@ -1319,8 +1319,9 @@
"controlNetBeginEnd": {
"heading": "Begin / End Step Percentage",
"paragraphs": [
"The part of the of the denoising process that will have the Control Adapter applied.",
"Generally, Control Adapters applied at the start of the process guide composition, and Control Adapters applied at the end guide details."
"This setting determines which portion of the denoising (generation) process incorporates the guidance from this layer.",
"• Start Step (%): Specifies when to begin applying the guidance from this layer during the generation process.",
"• End Step (%): Specifies when to stop applying this layer's guidance and revert general guidance from the model and other settings."
]
},
"controlNetControlMode": {
@@ -1338,13 +1339,15 @@
"paragraphs": ["Method to fit Control Adapter's input image size to the output generation size."]
},
"ipAdapterMethod": {
"heading": "Method",
"paragraphs": ["Method by which to apply the current IP Adapter."]
"heading": "Mode",
"paragraphs": ["The mode defines how the reference image will guide the generation process."]
},
"controlNetWeight": {
"heading": "Weight",
"paragraphs": [
"Weight of the Control Adapter. Higher weight will lead to larger impacts on the final image."
"Adjusts how strongly the layer influences the generation process",
"• Higher Weight (.75-2): Creates a more significant impact on the final result.",
"• Lower Weight (0-.75): Creates a smaller impact on the final result."
]
},
"dynamicPrompts": {
@@ -1803,10 +1806,13 @@
"megaControl": "Mega Control"
},
"ipAdapterMethod": {
"ipAdapterMethod": "IP Adapter Method",
"ipAdapterMethod": "Mode",
"full": "Style and Composition",
"fullDesc": "Applies visual style (colors, textures) & composition (layout, structure).",
"style": "Style Only",
"composition": "Composition Only"
"styleDesc": "Applies visual style (colors, textures) without considering its layout.",
"composition": "Composition Only",
"compositionDesc": "Replicates layout & structure while ignoring the reference's style."
},
"fill": {
"fillColor": "Fill Color",

View File

@@ -1,8 +1,10 @@
import type { ComboboxOnChange } from '@invoke-ai/ui-library';
import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library';
import { useAppSelector } from 'app/store/storeHooks';
import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover';
import type { IPMethodV2 } from 'features/controlLayers/store/types';
import { isIPMethodV2 } from 'features/controlLayers/store/types';
import { selectSystemShouldEnableModelDescriptions } from 'features/system/store/systemSlice';
import { memo, useCallback, useMemo } from 'react';
import { useTranslation } from 'react-i18next';
import { assert } from 'tsafe';
@@ -14,13 +16,27 @@ type Props = {
export const IPAdapterMethod = memo(({ method, onChange }: Props) => {
const { t } = useTranslation();
const shouldShowModelDescriptions = useAppSelector(selectSystemShouldEnableModelDescriptions);
const options: { label: string; value: IPMethodV2 }[] = useMemo(
() => [
{ label: t('controlLayers.ipAdapterMethod.full'), value: 'full' },
{ label: t('controlLayers.ipAdapterMethod.style'), value: 'style' },
{ label: t('controlLayers.ipAdapterMethod.composition'), value: 'composition' },
{
label: t('controlLayers.ipAdapterMethod.full'),
value: 'full',
description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.fullDesc') : undefined,
},
{
label: t('controlLayers.ipAdapterMethod.style'),
value: 'style',
description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.styleDesc') : undefined,
},
{
label: t('controlLayers.ipAdapterMethod.composition'),
value: 'composition',
description: shouldShowModelDescriptions ? t('controlLayers.ipAdapterMethod.compositionDesc') : undefined,
},
],
[t]
[t, shouldShowModelDescriptions]
);
const _onChange = useCallback<ComboboxOnChange>(
(v) => {

View File

@@ -37,8 +37,8 @@ export const gettingStartedVideos: VideoData[] = [
},
{
tKey: 'creatingAndComposingOnInvokesControlCanvas',
link: 'https://www.youtube.com/watch?v=MohWv5GZVGM&list=PLvWK1Kc8iXGrQy8r9TYg6QdUuJ5MMx-ZO&index=5&t=28s&pp=iAQB',
length: { minutes: 13, seconds: 56 },
link: 'https://www.youtube.com/watch?v=O4LaFcYFxlA',
length: { minutes: 2, seconds: 52 },
},
{
tKey: 'upscaling',