make ip_adapters work with stable-fast

resolve conflicts
ruff fixes
2026-01-16 15:37:55 -05:00 · 2023-12-21 17:29:28 -05:00 · 2023-12-21 16:31:42 -05:00 · 2023-12-21 16:29:32 -05:00 · 2023-12-21 16:28:42 -05:00 · 2023-12-21 16:10:52 -05:00
10 changed files with 61 additions and 15 deletions
--- a/docs/contributing/INVOCATIONS.md
+++ b/docs/contributing/INVOCATIONS.md
@@ -11,7 +11,7 @@ complex functionality.

 InvokeAI Nodes can be found in the `invokeai/app/invocations` directory. These can be used as examples to create your own nodes.

-New nodes should be added to a subfolder in the `nodes` directory found at the root level of the InvokeAI installation location. Nodes added to this folder will be imported upon application startup. 
+New nodes should be added to a subfolder in `nodes` direction found at the root level of the InvokeAI installation location. Nodes added to this folder will be able to be used upon application startup. 

 Example `nodes`  subfolder structure:
 ```py
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@@ -271,6 +271,7 @@ class InvokeAIAppConfig(InvokeAISettings):
    attention_slice_size: Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', json_schema_extra=Categories.Generation)
    force_tiled_decode  : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", json_schema_extra=Categories.Generation)
    png_compress_level  : int = Field(default=6, description="The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = fastest, largest filesize, 9 = slowest, smallest filesize", json_schema_extra=Categories.Generation)
+    stable_fast         : bool = Field(default=True, description="Enable stable-fast performance optimizations, if the library is installed and functional", json_schema_extra=Categories.Generation)

    # QUEUE
    max_queue_size      : int = Field(default=10000, gt=0, description="Maximum number of items in the session queue", json_schema_extra=Categories.Queue)
--- a/invokeai/backend/ip_adapter/attention_processor.py
+++ b/invokeai/backend/ip_adapter/attention_processor.py
@@ -141,7 +141,6 @@ class IPAttnProcessor2_0(torch.nn.Module):
                ip_hidden_states = ipa_embed

                # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding)
-
                ip_key = ipa_weights.to_k_ip(ip_hidden_states)
                ip_value = ipa_weights.to_v_ip(ip_hidden_states)

--- a/invokeai/backend/ip_adapter/ip_attention_weights.py
+++ b/invokeai/backend/ip_adapter/ip_attention_weights.py
@@ -12,6 +12,8 @@ class IPAttentionProcessorWeights(torch.nn.Module):
        super().__init__()
        self.to_k_ip = torch.nn.Linear(in_dim, out_dim, bias=False)
        self.to_v_ip = torch.nn.Linear(in_dim, out_dim, bias=False)
+        for param in self.parameters():
+            param.requires_grad = False


 class IPAttentionWeights(torch.nn.Module):
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -24,12 +24,14 @@ import sys
 import time
 from contextlib import suppress
 from dataclasses import dataclass, field
+from importlib.util import find_spec
 from pathlib import Path
 from typing import Any, Dict, Optional, Type, Union, types

 import torch

 import invokeai.backend.util.logging as logger
+from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.model_management.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
 from invokeai.backend.model_management.model_load_optimizations import skip_torch_weight_init

@@ -39,6 +41,26 @@ from .models import BaseModelType, ModelBase, ModelType, SubModelType
 if choose_torch_device() == torch.device("mps"):
    from torch import mps

+SFAST_AVAILABLE = False
+TRITON_AVAILABLE = False
+XFORMERS_AVAILABLE = False
+SFAST_CONFIG = None
+
+TRITON_AVAILABLE = find_spec("triton") is not None
+XFORMERS_AVAILABLE = find_spec("xformers") is not None
+
+try:
+    from sfast.compilers.diffusion_pipeline_compiler import CompilationConfig, compile_unet, compile_vae
+
+    SFAST_CONFIG = CompilationConfig.Default()
+    SFAST_CONFIG.enable_cuda_graph = True
+    SFAST_CONFIG.enable_xformers = XFORMERS_AVAILABLE
+    SFAST_CONFIG.enable_triton = TRITON_AVAILABLE
+    SFAST_AVAILABLE = True
+except ImportError:
+    pass
+
+
 # Maximum size of the cache, in gigs
 # Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
 DEFAULT_MAX_CACHE_SIZE = 6.0
@@ -110,6 +132,7 @@ class _CacheRecord:
 class ModelCache(object):
    def __init__(
        self,
+        app_config: InvokeAIAppConfig,
        max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
        max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
        execution_device: torch.device = torch.device("cuda"),
@@ -122,6 +145,7 @@ class ModelCache(object):
        log_memory_usage: bool = False,
    ):
        """
+        :param app_config: InvokeAIAppConfig for application
        :param max_cache_size: Maximum size of the RAM cache [6.0 GB]
        :param execution_device: Torch device to load active model into [torch.device('cuda')]
        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
@@ -135,6 +159,7 @@ class ModelCache(object):
            behaviour.
        """
        self.model_infos: Dict[str, ModelBase] = {}
+        self.app_config = app_config
        # allow lazy offloading only when vram cache enabled
        self.lazy_offloading = lazy_offloading and max_vram_cache_size > 0
        self.precision: torch.dtype = precision
@@ -239,6 +264,9 @@ class ModelCache(object):
            snapshot_before = self._capture_memory_snapshot()
            with skip_torch_weight_init():
                model = model_info.get_model(child_type=submodel, torch_dtype=self.precision)
+            if SFAST_AVAILABLE and self.app_config.stable_fast and submodel:
+                model = self._compile_model(model, submodel)
+
            snapshot_after = self._capture_memory_snapshot()
            end_load_time = time.time()

@@ -322,6 +350,16 @@ class ModelCache(object):
                    f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
                )

+    def _compile_model(self, model: Any, model_type: SubModelType) -> Any:
+        if model_type == SubModelType("unet"):
+            self.logger.info("SFast-compiling unet model")
+            return compile_unet(model, SFAST_CONFIG)
+        elif model_type == SubModelType("vae"):
+            self.logger.info("SFast-compiling vae model")
+            return compile_vae(model, SFAST_CONFIG)
+        else:
+            return model
+
    class ModelLocker(object):
        def __init__(self, cache, key, model, gpu_load, size_needed):
            """
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@@ -344,6 +344,7 @@ class ModelManager(object):
        self.app_config = InvokeAIAppConfig.get_config()
        self.logger = logger
        self.cache = ModelCache(
+            app_config=self.app_config,
            max_cache_size=max_cache_size,
            max_vram_cache_size=self.app_config.vram_cache_size,
            lazy_offloading=self.app_config.lazy_offload,
--- a/invokeai/frontend/web/public/locales/zh_CN.json
+++ b/invokeai/frontend/web/public/locales/zh_CN.json
@@ -1119,7 +1119,10 @@
        "deletedInvalidEdge": "已删除无效的边缘 {{source}} -> {{target}}",
        "unknownInput": "未知输入：{{name}}",
        "prototypeDesc": "此调用是一个原型 (prototype)。它可能会在本项目更新期间发生破坏性更改，并且随时可能被删除。",
-        "betaDesc": "此调用尚处于测试阶段。在稳定之前，它可能会在项目更新期间发生破坏性更改。本项目计划长期支持这种调用。"
+        "betaDesc": "此调用尚处于测试阶段。在稳定之前，它可能会在项目更新期间发生破坏性更改。本项目计划长期支持这种调用。",
+        "newWorkflow": "新建工作流",
+        "newWorkflowDesc": "是否创建一个新的工作流？",
+        "newWorkflowDesc2": "当前工作流有未保存的更改。"
    },
    "controlnet": {
        "resize": "直接缩放",
@@ -1635,7 +1638,7 @@
        "openWorkflow": "打开工作流",
        "clearWorkflowSearchFilter": "清除工作流检索过滤器",
        "workflowLibrary": "工作流库",
-        "downloadWorkflow": "下载工作流",
+        "downloadWorkflow": "保存到文件",
        "noRecentWorkflows": "无最近工作流",
        "workflowSaved": "已保存工作流",
        "workflowIsOpen": "工作流已打开",
@@ -1648,8 +1651,9 @@
        "deleteWorkflow": "删除工作流",
        "workflows": "工作流",
        "noDescription": "无描述",
-        "uploadWorkflow": "上传工作流",
-        "userWorkflows": "我的工作流"
+        "uploadWorkflow": "从文件中加载",
+        "userWorkflows": "我的工作流",
+        "newWorkflowCreated": "已创建新的工作流"
    },
    "app": {
        "storeNotInitialized": "商店尚未初始化"
--- a/invokeai/frontend/web/src/app/store/store.ts
+++ b/invokeai/frontend/web/src/app/store/store.ts
@@ -34,6 +34,7 @@ import { actionSanitizer } from './middleware/devtools/actionSanitizer';
 import { actionsDenylist } from './middleware/devtools/actionsDenylist';
 import { stateSanitizer } from './middleware/devtools/stateSanitizer';
 import { listenerMiddleware } from './middleware/listenerMiddleware';
+import { authToastMiddleware } from 'services/api/authToastMiddleware';

 const allReducers = {
  canvas: canvasReducer,
@@ -96,6 +97,7 @@ export const createStore = (uniqueStoreKey?: string, persist = true) =>
      })
        .concat(api.middleware)
        .concat(dynamicMiddlewares)
+        .concat(authToastMiddleware)
        .prepend(listenerMiddleware.middleware),
    enhancers: (getDefaultEnhancers) => {
      const _enhancers = getDefaultEnhancers().concat(autoBatchEnhancer());
--- a/invokeai/frontend/web/src/services/api/authToastMiddleware.ts
+++ b/invokeai/frontend/web/src/services/api/authToastMiddleware.ts
@@ -5,12 +5,10 @@ import { t } from 'i18next';
 import { z } from 'zod';

 const zRejectedForbiddenAction = z.object({
-  action: z.object({
-    payload: z.object({
-      status: z.literal(403),
-      data: z.object({
-        detail: z.string(),
-      }),
+  payload: z.object({
+    status: z.literal(403),
+    data: z.object({
+      detail: z.string(),
    }),
  }),
 });
@@ -22,8 +20,8 @@ export const authToastMiddleware: Middleware =
        const parsed = zRejectedForbiddenAction.parse(action);
        const { dispatch } = api;
        const customMessage =
-          parsed.action.payload.data.detail !== 'Forbidden'
-            ? parsed.action.payload.data.detail
+          parsed.payload.data.detail !== 'Forbidden'
+            ? parsed.payload.data.detail
            : undefined;
        dispatch(
          addToast({
@@ -32,7 +30,7 @@ export const authToastMiddleware: Middleware =
            description: customMessage,
          })
        );
-      } catch {
+      } catch (error) {
        // no-op
      }
    }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -113,6 +113,7 @@ dependencies = [
 "onnx" = ["onnxruntime"]
 "onnx-cuda" = ["onnxruntime-gpu"]
 "onnx-directml" = ["onnxruntime-directml"]
+"stable-fast" = ["stable-fast"]

 [project.scripts]
Author	SHA1	Message	Date
Lincoln Stein	4b9a46e4c2	make ip_adapters work with stable-fast	2023-12-21 17:29:28 -05:00
Lincoln Stein	952b12abb7	resolve conflicts	2023-12-21 16:31:42 -05:00
Lincoln Stein	2ff41afe8c	ruff fixes	2023-12-21 16:29:32 -05:00
Lincoln Stein	e22df59239	proof-of-principle support for stable-fast only compile model the first time :-) probe for availability of stable-fast compiler and triton at startup time simplify config logic	2023-12-21 16:28:42 -05:00
Lincoln Stein	e3ab074b95	probe for availability of stable-fast compiler and triton at startup time	2023-12-21 16:10:52 -05:00
Lincoln Stein	6cb3031c09	only compile model the first time :-)	2023-12-20 22:40:56 -05:00
Lincoln Stein	9c1d250665	hacked in stable-fast; can generate one image before crashing	2023-12-20 22:11:16 -05:00
Surisen	8d2952695d	translationBot(ui): update translation (Chinese (Simplified)) Currently translated at 99.8% (1363 of 1365 strings) Co-authored-by: Surisen <zhonghx0804@outlook.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/zh_Hans/ Translation: InvokeAI/Web UI	2023-12-21 09:56:06 +11:00
Mary Hipp	562fb1f3a1	add authToastMiddleware back and fix parsing	2023-12-20 14:59:33 -05:00