mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-02-07 06:54:58 -05:00
Compare commits
41 Commits
v5.0.0.a7
...
lstein/fea
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9dcace7d82 | ||
|
|
02957be333 | ||
|
|
5d6a77d336 | ||
|
|
9b7b182cf7 | ||
|
|
2219e3643a | ||
|
|
6932f27b43 | ||
|
|
0df018bd4e | ||
|
|
7088d5610b | ||
|
|
589a7959c0 | ||
|
|
e26360f85b | ||
|
|
debef2476e | ||
|
|
e57809e1c6 | ||
|
|
1c0067f931 | ||
|
|
c3d1252892 | ||
|
|
84f5cbdd97 | ||
|
|
edac01d4fb | ||
|
|
d04c880cce | ||
|
|
763a2e2632 | ||
|
|
eaadc55c7d | ||
|
|
89f8326c0b | ||
|
|
99558de178 | ||
|
|
77130f108d | ||
|
|
371f5bc782 | ||
|
|
fb9b7fb63a | ||
|
|
bd833900a3 | ||
|
|
a84f3058e2 | ||
|
|
f7436f3bae | ||
|
|
7dd93cb810 | ||
|
|
9adb15f86c | ||
|
|
3d69372785 | ||
|
|
eca29c41d0 | ||
|
|
9df0980c46 | ||
|
|
cef51ad80d | ||
|
|
83356ec74c | ||
|
|
9336a076de | ||
|
|
32d3e4dc5c | ||
|
|
a1dcab9c38 | ||
|
|
bd9b00a6bf | ||
|
|
eaa2c68693 | ||
|
|
24d73280ee | ||
|
|
6b991a5269 |
@@ -1328,7 +1328,7 @@ from invokeai.app.services.model_load import ModelLoadService, ModelLoaderRegist
|
|||||||
|
|
||||||
config = InvokeAIAppConfig.get_config()
|
config = InvokeAIAppConfig.get_config()
|
||||||
ram_cache = ModelCache(
|
ram_cache = ModelCache(
|
||||||
max_cache_size=config.ram_cache_size, max_vram_cache_size=config.vram_cache_size, logger=logger
|
max_cache_size=config.ram_cache_size, logger=logger
|
||||||
)
|
)
|
||||||
convert_cache = ModelConvertCache(
|
convert_cache = ModelConvertCache(
|
||||||
cache_path=config.models_convert_cache_path, max_size=config.convert_cache_size
|
cache_path=config.models_convert_cache_path, max_size=config.convert_cache_size
|
||||||
|
|||||||
@@ -103,6 +103,7 @@ class CompelInvocation(BaseInvocation):
|
|||||||
textual_inversion_manager=ti_manager,
|
textual_inversion_manager=ti_manager,
|
||||||
dtype_for_device_getter=TorchDevice.choose_torch_dtype,
|
dtype_for_device_getter=TorchDevice.choose_torch_dtype,
|
||||||
truncate_long_prompts=False,
|
truncate_long_prompts=False,
|
||||||
|
device=TorchDevice.choose_torch_device(),
|
||||||
)
|
)
|
||||||
|
|
||||||
conjunction = Compel.parse_prompt_string(self.prompt)
|
conjunction = Compel.parse_prompt_string(self.prompt)
|
||||||
@@ -117,6 +118,7 @@ class CompelInvocation(BaseInvocation):
|
|||||||
conditioning_data = ConditioningFieldData(conditionings=[BasicConditioningInfo(embeds=c)])
|
conditioning_data = ConditioningFieldData(conditionings=[BasicConditioningInfo(embeds=c)])
|
||||||
|
|
||||||
conditioning_name = context.conditioning.save(conditioning_data)
|
conditioning_name = context.conditioning.save(conditioning_data)
|
||||||
|
|
||||||
return ConditioningOutput(
|
return ConditioningOutput(
|
||||||
conditioning=ConditioningField(
|
conditioning=ConditioningField(
|
||||||
conditioning_name=conditioning_name,
|
conditioning_name=conditioning_name,
|
||||||
@@ -203,6 +205,7 @@ class SDXLPromptInvocationBase:
|
|||||||
truncate_long_prompts=False, # TODO:
|
truncate_long_prompts=False, # TODO:
|
||||||
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, # TODO: clip skip
|
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, # TODO: clip skip
|
||||||
requires_pooled=get_pooled,
|
requires_pooled=get_pooled,
|
||||||
|
device=TorchDevice.choose_torch_device(),
|
||||||
)
|
)
|
||||||
|
|
||||||
conjunction = Compel.parse_prompt_string(prompt)
|
conjunction = Compel.parse_prompt_string(prompt)
|
||||||
@@ -313,7 +316,6 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
conditioning_name = context.conditioning.save(conditioning_data)
|
conditioning_name = context.conditioning.save(conditioning_data)
|
||||||
|
|
||||||
return ConditioningOutput(
|
return ConditioningOutput(
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
|
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
|
||||||
|
import copy
|
||||||
import inspect
|
import inspect
|
||||||
from contextlib import ExitStack
|
from contextlib import ExitStack
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||||
@@ -193,9 +194,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
|
|||||||
text_embeddings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]] = []
|
text_embeddings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]] = []
|
||||||
text_embeddings_masks: list[Optional[torch.Tensor]] = []
|
text_embeddings_masks: list[Optional[torch.Tensor]] = []
|
||||||
for cond in cond_list:
|
for cond in cond_list:
|
||||||
cond_data = context.conditioning.load(cond.conditioning_name)
|
cond_data = copy.deepcopy(context.conditioning.load(cond.conditioning_name))
|
||||||
text_embeddings.append(cond_data.conditionings[0].to(device=device, dtype=dtype))
|
text_embeddings.append(cond_data.conditionings[0].to(device=device, dtype=dtype))
|
||||||
|
|
||||||
mask = cond.mask
|
mask = cond.mask
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
mask = context.tensors.load(mask.tensor_name)
|
mask = context.tensors.load(mask.tensor_name)
|
||||||
@@ -226,6 +226,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
|
|||||||
# Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
|
# Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
|
||||||
mask = mask.unsqueeze(0) # Shape: (1, h, w) -> (1, 1, h, w)
|
mask = mask.unsqueeze(0) # Shape: (1, h, w) -> (1, 1, h, w)
|
||||||
resized_mask = tf(mask)
|
resized_mask = tf(mask)
|
||||||
|
assert isinstance(resized_mask, torch.Tensor)
|
||||||
return resized_mask
|
return resized_mask
|
||||||
|
|
||||||
def _concat_regional_text_embeddings(
|
def _concat_regional_text_embeddings(
|
||||||
|
|||||||
@@ -26,13 +26,13 @@ LEGACY_INIT_FILE = Path("invokeai.init")
|
|||||||
DEFAULT_RAM_CACHE = 10.0
|
DEFAULT_RAM_CACHE = 10.0
|
||||||
DEFAULT_VRAM_CACHE = 0.25
|
DEFAULT_VRAM_CACHE = 0.25
|
||||||
DEFAULT_CONVERT_CACHE = 20.0
|
DEFAULT_CONVERT_CACHE = 20.0
|
||||||
DEVICE = Literal["auto", "cpu", "cuda", "cuda:1", "mps"]
|
DEVICE = Literal["auto", "cpu", "cuda:0", "cuda:1", "cuda:2", "cuda:3", "cuda:4", "cuda:5", "cuda:6", "cuda:7", "mps"]
|
||||||
PRECISION = Literal["auto", "float16", "bfloat16", "float32"]
|
PRECISION = Literal["auto", "float16", "bfloat16", "float32", "autocast"]
|
||||||
ATTENTION_TYPE = Literal["auto", "normal", "xformers", "sliced", "torch-sdp"]
|
ATTENTION_TYPE = Literal["auto", "normal", "xformers", "sliced", "torch-sdp"]
|
||||||
ATTENTION_SLICE_SIZE = Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8]
|
ATTENTION_SLICE_SIZE = Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8]
|
||||||
LOG_FORMAT = Literal["plain", "color", "syslog", "legacy"]
|
LOG_FORMAT = Literal["plain", "color", "syslog", "legacy"]
|
||||||
LOG_LEVEL = Literal["debug", "info", "warning", "error", "critical"]
|
LOG_LEVEL = Literal["debug", "info", "warning", "error", "critical"]
|
||||||
CONFIG_SCHEMA_VERSION = "4.0.1"
|
CONFIG_SCHEMA_VERSION = "4.0.2"
|
||||||
|
|
||||||
|
|
||||||
def get_default_ram_cache_size() -> float:
|
def get_default_ram_cache_size() -> float:
|
||||||
@@ -105,14 +105,16 @@ class InvokeAIAppConfig(BaseSettings):
|
|||||||
convert_cache: Maximum size of on-disk converted models cache (GB).
|
convert_cache: Maximum size of on-disk converted models cache (GB).
|
||||||
lazy_offload: Keep models in VRAM until their space is needed.
|
lazy_offload: Keep models in VRAM until their space is needed.
|
||||||
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
|
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
|
||||||
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps`
|
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda:0`, `cuda:1`, `cuda:2`, `cuda:3`, `cuda:4`, `cuda:5`, `cuda:6`, `cuda:7`, `mps`
|
||||||
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`
|
devices: List of execution devices; will override default device selected.
|
||||||
|
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast`
|
||||||
sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements.
|
sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements.
|
||||||
attention_type: Attention type.<br>Valid values: `auto`, `normal`, `xformers`, `sliced`, `torch-sdp`
|
attention_type: Attention type.<br>Valid values: `auto`, `normal`, `xformers`, `sliced`, `torch-sdp`
|
||||||
attention_slice_size: Slice size, valid when attention_type=="sliced".<br>Valid values: `auto`, `balanced`, `max`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`
|
attention_slice_size: Slice size, valid when attention_type=="sliced".<br>Valid values: `auto`, `balanced`, `max`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`
|
||||||
force_tiled_decode: Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).
|
force_tiled_decode: Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).
|
||||||
pil_compress_level: The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.
|
pil_compress_level: The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.
|
||||||
max_queue_size: Maximum number of items in the session queue.
|
max_queue_size: Maximum number of items in the session queue.
|
||||||
|
max_threads: Maximum number of session queue execution threads. Autocalculated from number of GPUs if not set.
|
||||||
clear_queue_on_startup: Empties session queue on startup.
|
clear_queue_on_startup: Empties session queue on startup.
|
||||||
allow_nodes: List of nodes to allow. Omit to allow all.
|
allow_nodes: List of nodes to allow. Omit to allow all.
|
||||||
deny_nodes: List of nodes to deny. Omit to deny none.
|
deny_nodes: List of nodes to deny. Omit to deny none.
|
||||||
@@ -178,6 +180,7 @@ class InvokeAIAppConfig(BaseSettings):
|
|||||||
|
|
||||||
# DEVICE
|
# DEVICE
|
||||||
device: DEVICE = Field(default="auto", description="Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.")
|
device: DEVICE = Field(default="auto", description="Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.")
|
||||||
|
devices: Optional[list[DEVICE]] = Field(default=None, description="List of execution devices; will override default device selected.")
|
||||||
precision: PRECISION = Field(default="auto", description="Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.")
|
precision: PRECISION = Field(default="auto", description="Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.")
|
||||||
|
|
||||||
# GENERATION
|
# GENERATION
|
||||||
@@ -187,6 +190,7 @@ class InvokeAIAppConfig(BaseSettings):
|
|||||||
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).")
|
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).")
|
||||||
pil_compress_level: int = Field(default=1, description="The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.")
|
pil_compress_level: int = Field(default=1, description="The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.")
|
||||||
max_queue_size: int = Field(default=10000, gt=0, description="Maximum number of items in the session queue.")
|
max_queue_size: int = Field(default=10000, gt=0, description="Maximum number of items in the session queue.")
|
||||||
|
max_threads: Optional[int] = Field(default=None, description="Maximum number of session queue execution threads. Autocalculated from number of GPUs if not set.")
|
||||||
clear_queue_on_startup: bool = Field(default=False, description="Empties session queue on startup.")
|
clear_queue_on_startup: bool = Field(default=False, description="Empties session queue on startup.")
|
||||||
|
|
||||||
# NODES
|
# NODES
|
||||||
@@ -376,9 +380,6 @@ def migrate_v3_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
|
|||||||
# `max_cache_size` was renamed to `ram` some time in v3, but both names were used
|
# `max_cache_size` was renamed to `ram` some time in v3, but both names were used
|
||||||
if k == "max_cache_size" and "ram" not in category_dict:
|
if k == "max_cache_size" and "ram" not in category_dict:
|
||||||
parsed_config_dict["ram"] = v
|
parsed_config_dict["ram"] = v
|
||||||
# `max_vram_cache_size` was renamed to `vram` some time in v3, but both names were used
|
|
||||||
if k == "max_vram_cache_size" and "vram" not in category_dict:
|
|
||||||
parsed_config_dict["vram"] = v
|
|
||||||
# autocast was removed in v4.0.1
|
# autocast was removed in v4.0.1
|
||||||
if k == "precision" and v == "autocast":
|
if k == "precision" and v == "autocast":
|
||||||
parsed_config_dict["precision"] = "auto"
|
parsed_config_dict["precision"] = "auto"
|
||||||
@@ -426,6 +427,27 @@ def migrate_v4_0_0_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig
|
|||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_v4_0_1_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
|
||||||
|
"""Migrate v4.0.1 config dictionary to a current config object.
|
||||||
|
|
||||||
|
A few new multi-GPU options were added in 4.0.2, and this simply
|
||||||
|
updates the schema label.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config_dict: A dictionary of settings from a v4.0.1 config file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An instance of `InvokeAIAppConfig` with the migrated settings.
|
||||||
|
"""
|
||||||
|
parsed_config_dict: dict[str, Any] = {}
|
||||||
|
for k, _ in config_dict.items():
|
||||||
|
if k == "schema_version":
|
||||||
|
parsed_config_dict[k] = CONFIG_SCHEMA_VERSION
|
||||||
|
config = DefaultInvokeAIAppConfig.model_validate(parsed_config_dict)
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
# TO DO: replace this with a formal registration and migration system
|
||||||
def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
|
def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
|
||||||
"""Load and migrate a config file to the latest version.
|
"""Load and migrate a config file to the latest version.
|
||||||
|
|
||||||
@@ -457,6 +479,10 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
|
|||||||
loaded_config_dict = migrate_v4_0_0_config_dict(loaded_config_dict)
|
loaded_config_dict = migrate_v4_0_0_config_dict(loaded_config_dict)
|
||||||
loaded_config_dict.write_file(config_path)
|
loaded_config_dict.write_file(config_path)
|
||||||
|
|
||||||
|
elif loaded_config_dict["schema_version"] == "4.0.1":
|
||||||
|
loaded_config_dict = migrate_v4_0_1_config_dict(loaded_config_dict)
|
||||||
|
loaded_config_dict.write_file(config_path)
|
||||||
|
|
||||||
# Attempt to load as a v4 config file
|
# Attempt to load as a v4 config file
|
||||||
try:
|
try:
|
||||||
# Meta is not included in the model fields, so we need to validate it separately
|
# Meta is not included in the model fields, so we need to validate it separately
|
||||||
|
|||||||
@@ -53,11 +53,11 @@ class InvocationServices:
|
|||||||
model_images: "ModelImageFileStorageBase",
|
model_images: "ModelImageFileStorageBase",
|
||||||
model_manager: "ModelManagerServiceBase",
|
model_manager: "ModelManagerServiceBase",
|
||||||
download_queue: "DownloadQueueServiceBase",
|
download_queue: "DownloadQueueServiceBase",
|
||||||
performance_statistics: "InvocationStatsServiceBase",
|
|
||||||
session_queue: "SessionQueueBase",
|
session_queue: "SessionQueueBase",
|
||||||
session_processor: "SessionProcessorBase",
|
session_processor: "SessionProcessorBase",
|
||||||
invocation_cache: "InvocationCacheBase",
|
invocation_cache: "InvocationCacheBase",
|
||||||
names: "NameServiceBase",
|
names: "NameServiceBase",
|
||||||
|
performance_statistics: "InvocationStatsServiceBase",
|
||||||
urls: "UrlServiceBase",
|
urls: "UrlServiceBase",
|
||||||
workflow_records: "WorkflowRecordsStorageBase",
|
workflow_records: "WorkflowRecordsStorageBase",
|
||||||
tensors: "ObjectSerializerBase[torch.Tensor]",
|
tensors: "ObjectSerializerBase[torch.Tensor]",
|
||||||
@@ -77,11 +77,11 @@ class InvocationServices:
|
|||||||
self.model_images = model_images
|
self.model_images = model_images
|
||||||
self.model_manager = model_manager
|
self.model_manager = model_manager
|
||||||
self.download_queue = download_queue
|
self.download_queue = download_queue
|
||||||
self.performance_statistics = performance_statistics
|
|
||||||
self.session_queue = session_queue
|
self.session_queue = session_queue
|
||||||
self.session_processor = session_processor
|
self.session_processor = session_processor
|
||||||
self.invocation_cache = invocation_cache
|
self.invocation_cache = invocation_cache
|
||||||
self.names = names
|
self.names = names
|
||||||
|
self.performance_statistics = performance_statistics
|
||||||
self.urls = urls
|
self.urls = urls
|
||||||
self.workflow_records = workflow_records
|
self.workflow_records = workflow_records
|
||||||
self.tensors = tensors
|
self.tensors = tensors
|
||||||
|
|||||||
@@ -74,9 +74,9 @@ class InvocationStatsService(InvocationStatsServiceBase):
|
|||||||
)
|
)
|
||||||
self._stats[graph_execution_state_id].add_node_execution_stats(node_stats)
|
self._stats[graph_execution_state_id].add_node_execution_stats(node_stats)
|
||||||
|
|
||||||
def reset_stats(self):
|
def reset_stats(self, graph_execution_state_id: str):
|
||||||
self._stats = {}
|
self._stats.pop(graph_execution_state_id)
|
||||||
self._cache_stats = {}
|
self._cache_stats.pop(graph_execution_state_id)
|
||||||
|
|
||||||
def get_stats(self, graph_execution_state_id: str) -> InvocationStatsSummary:
|
def get_stats(self, graph_execution_state_id: str) -> InvocationStatsSummary:
|
||||||
graph_stats_summary = self._get_graph_summary(graph_execution_state_id)
|
graph_stats_summary = self._get_graph_summary(graph_execution_state_id)
|
||||||
|
|||||||
@@ -284,9 +284,14 @@ class ModelInstallService(ModelInstallServiceBase):
|
|||||||
unfinished_jobs = [x for x in self._install_jobs if not x.in_terminal_state]
|
unfinished_jobs = [x for x in self._install_jobs if not x.in_terminal_state]
|
||||||
self._install_jobs = unfinished_jobs
|
self._install_jobs = unfinished_jobs
|
||||||
|
|
||||||
def _migrate_yaml(self) -> None:
|
def _migrate_yaml(self, rename_yaml: Optional[bool] = True, overwrite_db: Optional[bool] = False) -> None:
|
||||||
db_models = self.record_store.all_models()
|
db_models = self.record_store.all_models()
|
||||||
|
|
||||||
|
if overwrite_db:
|
||||||
|
for model in db_models:
|
||||||
|
self.record_store.del_model(model.key)
|
||||||
|
db_models = self.record_store.all_models()
|
||||||
|
|
||||||
legacy_models_yaml_path = (
|
legacy_models_yaml_path = (
|
||||||
self._app_config.legacy_models_yaml_path or self._app_config.root_path / "configs" / "models.yaml"
|
self._app_config.legacy_models_yaml_path or self._app_config.root_path / "configs" / "models.yaml"
|
||||||
)
|
)
|
||||||
@@ -336,7 +341,8 @@ class ModelInstallService(ModelInstallServiceBase):
|
|||||||
self._logger.warning(f"Model at {model_path} could not be migrated: {e}")
|
self._logger.warning(f"Model at {model_path} could not be migrated: {e}")
|
||||||
|
|
||||||
# Rename `models.yaml` to `models.yaml.bak` to prevent re-migration
|
# Rename `models.yaml` to `models.yaml.bak` to prevent re-migration
|
||||||
legacy_models_yaml_path.rename(legacy_models_yaml_path.with_suffix(".yaml.bak"))
|
if rename_yaml:
|
||||||
|
legacy_models_yaml_path.rename(legacy_models_yaml_path.with_suffix(".yaml.bak"))
|
||||||
|
|
||||||
# Unset the path - we are done with it either way
|
# Unset the path - we are done with it either way
|
||||||
self._app_config.legacy_models_yaml_path = None
|
self._app_config.legacy_models_yaml_path = None
|
||||||
|
|||||||
@@ -33,6 +33,11 @@ class ModelLoadServiceBase(ABC):
|
|||||||
def convert_cache(self) -> ModelConvertCacheBase:
|
def convert_cache(self) -> ModelConvertCacheBase:
|
||||||
"""Return the checkpoint convert cache used by this loader."""
|
"""Return the checkpoint convert cache used by this loader."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def gpu_count(self) -> int:
|
||||||
|
"""Return the number of GPUs we are configured to use."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def load_model_from_path(
|
def load_model_from_path(
|
||||||
self, model_path: Path, loader: Optional[Callable[[Path], AnyModel]] = None
|
self, model_path: Path, loader: Optional[Callable[[Path], AnyModel]] = None
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ class ModelLoadService(ModelLoadServiceBase):
|
|||||||
self._registry = registry
|
self._registry = registry
|
||||||
|
|
||||||
def start(self, invoker: Invoker) -> None:
|
def start(self, invoker: Invoker) -> None:
|
||||||
|
"""Start the service."""
|
||||||
self._invoker = invoker
|
self._invoker = invoker
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -53,6 +54,11 @@ class ModelLoadService(ModelLoadServiceBase):
|
|||||||
"""Return the RAM cache used by this loader."""
|
"""Return the RAM cache used by this loader."""
|
||||||
return self._ram_cache
|
return self._ram_cache
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gpu_count(self) -> int:
|
||||||
|
"""Return the number of GPUs available for our uses."""
|
||||||
|
return len(self._ram_cache.execution_devices)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def convert_cache(self) -> ModelConvertCacheBase:
|
def convert_cache(self) -> ModelConvertCacheBase:
|
||||||
"""Return the checkpoint convert cache used by this loader."""
|
"""Return the checkpoint convert cache used by this loader."""
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
|
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional, Set
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
@@ -31,7 +32,7 @@ class ModelManagerServiceBase(ABC):
|
|||||||
model_record_service: ModelRecordServiceBase,
|
model_record_service: ModelRecordServiceBase,
|
||||||
download_queue: DownloadQueueServiceBase,
|
download_queue: DownloadQueueServiceBase,
|
||||||
events: EventServiceBase,
|
events: EventServiceBase,
|
||||||
execution_device: torch.device,
|
execution_devices: Optional[Set[torch.device]] = None,
|
||||||
) -> Self:
|
) -> Self:
|
||||||
"""
|
"""
|
||||||
Construct the model manager service instance.
|
Construct the model manager service instance.
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
|
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
|
||||||
"""Implementation of ModelManagerServiceBase."""
|
"""Implementation of ModelManagerServiceBase."""
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
|
|
||||||
from invokeai.app.services.invoker import Invoker
|
from invokeai.app.services.invoker import Invoker
|
||||||
from invokeai.backend.model_manager.load import ModelCache, ModelConvertCache, ModelLoaderRegistry
|
from invokeai.backend.model_manager.load import ModelCache, ModelConvertCache, ModelLoaderRegistry
|
||||||
from invokeai.backend.util.devices import TorchDevice
|
|
||||||
from invokeai.backend.util.logging import InvokeAILogger
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
|
|
||||||
from ..config import InvokeAIAppConfig
|
from ..config import InvokeAIAppConfig
|
||||||
@@ -69,7 +65,6 @@ class ModelManagerService(ModelManagerServiceBase):
|
|||||||
model_record_service: ModelRecordServiceBase,
|
model_record_service: ModelRecordServiceBase,
|
||||||
download_queue: DownloadQueueServiceBase,
|
download_queue: DownloadQueueServiceBase,
|
||||||
events: EventServiceBase,
|
events: EventServiceBase,
|
||||||
execution_device: Optional[torch.device] = None,
|
|
||||||
) -> Self:
|
) -> Self:
|
||||||
"""
|
"""
|
||||||
Construct the model manager service instance.
|
Construct the model manager service instance.
|
||||||
@@ -82,9 +77,7 @@ class ModelManagerService(ModelManagerServiceBase):
|
|||||||
ram_cache = ModelCache(
|
ram_cache = ModelCache(
|
||||||
max_cache_size=app_config.ram,
|
max_cache_size=app_config.ram,
|
||||||
max_vram_cache_size=app_config.vram,
|
max_vram_cache_size=app_config.vram,
|
||||||
lazy_offloading=app_config.lazy_offload,
|
|
||||||
logger=logger,
|
logger=logger,
|
||||||
execution_device=execution_device or TorchDevice.choose_torch_device(),
|
|
||||||
)
|
)
|
||||||
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)
|
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)
|
||||||
loader = ModelLoadService(
|
loader = ModelLoadService(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import threading
|
||||||
import typing
|
import typing
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Optional, TypeVar
|
from typing import TYPE_CHECKING, Optional, TypeVar
|
||||||
@@ -9,6 +10,7 @@ import torch
|
|||||||
from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
|
from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
|
||||||
from invokeai.app.services.object_serializer.object_serializer_common import ObjectNotFoundError
|
from invokeai.app.services.object_serializer.object_serializer_common import ObjectNotFoundError
|
||||||
from invokeai.app.util.misc import uuid_string
|
from invokeai.app.util.misc import uuid_string
|
||||||
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from invokeai.app.services.invoker import Invoker
|
from invokeai.app.services.invoker import Invoker
|
||||||
@@ -70,7 +72,10 @@ class ObjectSerializerDisk(ObjectSerializerBase[T]):
|
|||||||
return self._output_dir / name
|
return self._output_dir / name
|
||||||
|
|
||||||
def _new_name(self) -> str:
|
def _new_name(self) -> str:
|
||||||
return f"{self._obj_class_name}_{uuid_string()}"
|
tid = threading.current_thread().ident
|
||||||
|
# Add tid to the object name because uuid4 not thread-safe on windows
|
||||||
|
# See https://stackoverflow.com/questions/2759644/python-multiprocessing-doesnt-play-nicely-with-uuid-uuid4
|
||||||
|
return f"{self._obj_class_name}_{tid}-{uuid_string()}"
|
||||||
|
|
||||||
def _tempdir_cleanup(self) -> None:
|
def _tempdir_cleanup(self) -> None:
|
||||||
"""Calls `cleanup` on the temporary directory, if it exists."""
|
"""Calls `cleanup` on the temporary directory, if it exists."""
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import traceback
|
import traceback
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from threading import BoundedSemaphore, Thread
|
from queue import Queue
|
||||||
|
from threading import BoundedSemaphore, Lock, Thread
|
||||||
from threading import Event as ThreadEvent
|
from threading import Event as ThreadEvent
|
||||||
from typing import Optional
|
from typing import Optional, Set
|
||||||
|
|
||||||
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput
|
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput
|
||||||
from invokeai.app.services.events.events_common import (
|
from invokeai.app.services.events.events_common import (
|
||||||
@@ -26,6 +27,7 @@ from invokeai.app.services.session_queue.session_queue_common import SessionQueu
|
|||||||
from invokeai.app.services.shared.graph import NodeInputError
|
from invokeai.app.services.shared.graph import NodeInputError
|
||||||
from invokeai.app.services.shared.invocation_context import InvocationContextData, build_invocation_context
|
from invokeai.app.services.shared.invocation_context import InvocationContextData, build_invocation_context
|
||||||
from invokeai.app.util.profiler import Profiler
|
from invokeai.app.util.profiler import Profiler
|
||||||
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
|
|
||||||
from ..invoker import Invoker
|
from ..invoker import Invoker
|
||||||
from .session_processor_base import InvocationServices, SessionProcessorBase, SessionRunnerBase
|
from .session_processor_base import InvocationServices, SessionProcessorBase, SessionRunnerBase
|
||||||
@@ -57,8 +59,11 @@ class DefaultSessionRunner(SessionRunnerBase):
|
|||||||
self._on_after_run_node_callbacks = on_after_run_node_callbacks or []
|
self._on_after_run_node_callbacks = on_after_run_node_callbacks or []
|
||||||
self._on_node_error_callbacks = on_node_error_callbacks or []
|
self._on_node_error_callbacks = on_node_error_callbacks or []
|
||||||
self._on_after_run_session_callbacks = on_after_run_session_callbacks or []
|
self._on_after_run_session_callbacks = on_after_run_session_callbacks or []
|
||||||
|
self._process_lock = Lock()
|
||||||
|
|
||||||
def start(self, services: InvocationServices, cancel_event: ThreadEvent, profiler: Optional[Profiler] = None):
|
def start(
|
||||||
|
self, services: InvocationServices, cancel_event: ThreadEvent, profiler: Optional[Profiler] = None
|
||||||
|
) -> None:
|
||||||
self._services = services
|
self._services = services
|
||||||
self._cancel_event = cancel_event
|
self._cancel_event = cancel_event
|
||||||
self._profiler = profiler
|
self._profiler = profiler
|
||||||
@@ -76,7 +81,8 @@ class DefaultSessionRunner(SessionRunnerBase):
|
|||||||
# Loop over invocations until the session is complete or canceled
|
# Loop over invocations until the session is complete or canceled
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
invocation = queue_item.session.next()
|
with self._process_lock:
|
||||||
|
invocation = queue_item.session.next()
|
||||||
# Anything other than a `NodeInputError` is handled as a processor error
|
# Anything other than a `NodeInputError` is handled as a processor error
|
||||||
except NodeInputError as e:
|
except NodeInputError as e:
|
||||||
error_type = e.__class__.__name__
|
error_type = e.__class__.__name__
|
||||||
@@ -108,7 +114,7 @@ class DefaultSessionRunner(SessionRunnerBase):
|
|||||||
|
|
||||||
self._on_after_run_session(queue_item=queue_item)
|
self._on_after_run_session(queue_item=queue_item)
|
||||||
|
|
||||||
def run_node(self, invocation: BaseInvocation, queue_item: SessionQueueItem):
|
def run_node(self, invocation: BaseInvocation, queue_item: SessionQueueItem) -> None:
|
||||||
try:
|
try:
|
||||||
# Any unhandled exception in this scope is an invocation error & will fail the graph
|
# Any unhandled exception in this scope is an invocation error & will fail the graph
|
||||||
with self._services.performance_statistics.collect_stats(invocation, queue_item.session_id):
|
with self._services.performance_statistics.collect_stats(invocation, queue_item.session_id):
|
||||||
@@ -210,7 +216,7 @@ class DefaultSessionRunner(SessionRunnerBase):
|
|||||||
# we don't care about that - suppress the error.
|
# we don't care about that - suppress the error.
|
||||||
with suppress(GESStatsNotFoundError):
|
with suppress(GESStatsNotFoundError):
|
||||||
self._services.performance_statistics.log_stats(queue_item.session.id)
|
self._services.performance_statistics.log_stats(queue_item.session.id)
|
||||||
self._services.performance_statistics.reset_stats()
|
self._services.performance_statistics.reset_stats(queue_item.session.id)
|
||||||
|
|
||||||
for callback in self._on_after_run_session_callbacks:
|
for callback in self._on_after_run_session_callbacks:
|
||||||
callback(queue_item=queue_item)
|
callback(queue_item=queue_item)
|
||||||
@@ -324,7 +330,7 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
|
|
||||||
def start(self, invoker: Invoker) -> None:
|
def start(self, invoker: Invoker) -> None:
|
||||||
self._invoker: Invoker = invoker
|
self._invoker: Invoker = invoker
|
||||||
self._queue_item: Optional[SessionQueueItem] = None
|
self._active_queue_items: Set[SessionQueueItem] = set()
|
||||||
self._invocation: Optional[BaseInvocation] = None
|
self._invocation: Optional[BaseInvocation] = None
|
||||||
|
|
||||||
self._resume_event = ThreadEvent()
|
self._resume_event = ThreadEvent()
|
||||||
@@ -350,7 +356,14 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self._worker_thread_count = self._invoker.services.configuration.max_threads or len(
|
||||||
|
TorchDevice.execution_devices()
|
||||||
|
)
|
||||||
|
|
||||||
|
self._session_worker_queue: Queue[SessionQueueItem] = Queue()
|
||||||
|
|
||||||
self.session_runner.start(services=invoker.services, cancel_event=self._cancel_event, profiler=self._profiler)
|
self.session_runner.start(services=invoker.services, cancel_event=self._cancel_event, profiler=self._profiler)
|
||||||
|
# Session processor - singlethreaded
|
||||||
self._thread = Thread(
|
self._thread = Thread(
|
||||||
name="session_processor",
|
name="session_processor",
|
||||||
target=self._process,
|
target=self._process,
|
||||||
@@ -363,6 +376,16 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
)
|
)
|
||||||
self._thread.start()
|
self._thread.start()
|
||||||
|
|
||||||
|
# Session processor workers - multithreaded
|
||||||
|
self._invoker.services.logger.debug(f"Starting {self._worker_thread_count} session processing threads.")
|
||||||
|
for _i in range(0, self._worker_thread_count):
|
||||||
|
worker = Thread(
|
||||||
|
name="session_worker",
|
||||||
|
target=self._process_next_session,
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
worker.start()
|
||||||
|
|
||||||
def stop(self, *args, **kwargs) -> None:
|
def stop(self, *args, **kwargs) -> None:
|
||||||
self._stop_event.set()
|
self._stop_event.set()
|
||||||
|
|
||||||
@@ -370,7 +393,7 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
self._poll_now_event.set()
|
self._poll_now_event.set()
|
||||||
|
|
||||||
async def _on_queue_cleared(self, event: FastAPIEvent[QueueClearedEvent]) -> None:
|
async def _on_queue_cleared(self, event: FastAPIEvent[QueueClearedEvent]) -> None:
|
||||||
if self._queue_item and self._queue_item.queue_id == event[1].queue_id:
|
if any(item.queue_id == event[1].queue_id for item in self._active_queue_items):
|
||||||
self._cancel_event.set()
|
self._cancel_event.set()
|
||||||
self._poll_now()
|
self._poll_now()
|
||||||
|
|
||||||
@@ -378,7 +401,7 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
self._poll_now()
|
self._poll_now()
|
||||||
|
|
||||||
async def _on_queue_item_status_changed(self, event: FastAPIEvent[QueueItemStatusChangedEvent]) -> None:
|
async def _on_queue_item_status_changed(self, event: FastAPIEvent[QueueItemStatusChangedEvent]) -> None:
|
||||||
if self._queue_item and event[1].status in ["completed", "failed", "canceled"]:
|
if self._active_queue_items and event[1].status in ["completed", "failed", "canceled"]:
|
||||||
# When the queue item is canceled via HTTP, the queue item status is set to `"canceled"` and this event is
|
# When the queue item is canceled via HTTP, the queue item status is set to `"canceled"` and this event is
|
||||||
# emitted. We need to respond to this event and stop graph execution. This is done by setting the cancel
|
# emitted. We need to respond to this event and stop graph execution. This is done by setting the cancel
|
||||||
# event, which the session runner checks between invocations. If set, the session runner loop is broken.
|
# event, which the session runner checks between invocations. If set, the session runner loop is broken.
|
||||||
@@ -403,7 +426,7 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
def get_status(self) -> SessionProcessorStatus:
|
def get_status(self) -> SessionProcessorStatus:
|
||||||
return SessionProcessorStatus(
|
return SessionProcessorStatus(
|
||||||
is_started=self._resume_event.is_set(),
|
is_started=self._resume_event.is_set(),
|
||||||
is_processing=self._queue_item is not None,
|
is_processing=len(self._active_queue_items) > 0,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _process(
|
def _process(
|
||||||
@@ -428,30 +451,22 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
resume_event.wait()
|
resume_event.wait()
|
||||||
|
|
||||||
# Get the next session to process
|
# Get the next session to process
|
||||||
self._queue_item = self._invoker.services.session_queue.dequeue()
|
queue_item = self._invoker.services.session_queue.dequeue()
|
||||||
|
|
||||||
if self._queue_item is None:
|
if queue_item is None:
|
||||||
# The queue was empty, wait for next polling interval or event to try again
|
# The queue was empty, wait for next polling interval or event to try again
|
||||||
self._invoker.services.logger.debug("Waiting for next polling interval or event")
|
self._invoker.services.logger.debug("Waiting for next polling interval or event")
|
||||||
poll_now_event.wait(self._polling_interval)
|
poll_now_event.wait(self._polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._invoker.services.logger.debug(f"Executing queue item {self._queue_item.item_id}")
|
self._session_worker_queue.put(queue_item)
|
||||||
|
self._invoker.services.logger.debug(f"Scheduling queue item {queue_item.item_id} to run")
|
||||||
cancel_event.clear()
|
cancel_event.clear()
|
||||||
|
|
||||||
# Run the graph
|
# Run the graph
|
||||||
self.session_runner.run(queue_item=self._queue_item)
|
# self.session_runner.run(queue_item=self._queue_item)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception:
|
||||||
error_type = e.__class__.__name__
|
|
||||||
error_message = str(e)
|
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
self._on_non_fatal_processor_error(
|
|
||||||
queue_item=self._queue_item,
|
|
||||||
error_type=error_type,
|
|
||||||
error_message=error_message,
|
|
||||||
error_traceback=error_traceback,
|
|
||||||
)
|
|
||||||
# Wait for next polling interval or event to try again
|
# Wait for next polling interval or event to try again
|
||||||
poll_now_event.wait(self._polling_interval)
|
poll_now_event.wait(self._polling_interval)
|
||||||
continue
|
continue
|
||||||
@@ -466,9 +481,25 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
finally:
|
finally:
|
||||||
stop_event.clear()
|
stop_event.clear()
|
||||||
poll_now_event.clear()
|
poll_now_event.clear()
|
||||||
self._queue_item = None
|
|
||||||
self._thread_semaphore.release()
|
self._thread_semaphore.release()
|
||||||
|
|
||||||
|
def _process_next_session(self) -> None:
|
||||||
|
while True:
|
||||||
|
self._resume_event.wait()
|
||||||
|
queue_item = self._session_worker_queue.get()
|
||||||
|
if queue_item.status == "canceled":
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
self._active_queue_items.add(queue_item)
|
||||||
|
# reserve a GPU for this session - may block
|
||||||
|
with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device():
|
||||||
|
# Run the session on the reserved GPU
|
||||||
|
self.session_runner.run(queue_item=queue_item)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
finally:
|
||||||
|
self._active_queue_items.remove(queue_item)
|
||||||
|
|
||||||
def _on_non_fatal_processor_error(
|
def _on_non_fatal_processor_error(
|
||||||
self,
|
self,
|
||||||
queue_item: Optional[SessionQueueItem],
|
queue_item: Optional[SessionQueueItem],
|
||||||
|
|||||||
@@ -236,6 +236,9 @@ class SessionQueueItemWithoutGraph(BaseModel):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
return self.item_id
|
||||||
|
|
||||||
|
|
||||||
class SessionQueueItemDTO(SessionQueueItemWithoutGraph):
|
class SessionQueueItemDTO(SessionQueueItemWithoutGraph):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -652,7 +652,7 @@ class Graph(BaseModel):
|
|||||||
output_fields = [get_input_field(self.get_node(e.node_id), e.field) for e in outputs]
|
output_fields = [get_input_field(self.get_node(e.node_id), e.field) for e in outputs]
|
||||||
|
|
||||||
# Input type must be a list
|
# Input type must be a list
|
||||||
if get_origin(input_field) != list:
|
if get_origin(input_field) is not list:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Validate that all outputs match the input type
|
# Validate that all outputs match the input type
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Callable, Optional, Union
|
from typing import TYPE_CHECKING, Callable, Optional, Union
|
||||||
|
|
||||||
|
import torch
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pydantic.networks import AnyHttpUrl
|
from pydantic.networks import AnyHttpUrl
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
@@ -26,11 +27,13 @@ from invokeai.backend.model_manager.config import (
|
|||||||
from invokeai.backend.model_manager.load.load_base import LoadedModel, LoadedModelWithoutConfig
|
from invokeai.backend.model_manager.load.load_base import LoadedModel, LoadedModelWithoutConfig
|
||||||
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
|
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
|
||||||
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
|
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
|
||||||
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from invokeai.app.invocations.baseinvocation import BaseInvocation
|
from invokeai.app.invocations.baseinvocation import BaseInvocation
|
||||||
from invokeai.app.invocations.model import ModelIdentifierField
|
from invokeai.app.invocations.model import ModelIdentifierField
|
||||||
from invokeai.app.services.session_queue.session_queue_common import SessionQueueItem
|
from invokeai.app.services.session_queue.session_queue_common import SessionQueueItem
|
||||||
|
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The InvocationContext provides access to various services and data about the current invocation.
|
The InvocationContext provides access to various services and data about the current invocation.
|
||||||
@@ -323,7 +326,6 @@ class ConditioningInterface(InvocationContextInterface):
|
|||||||
Returns:
|
Returns:
|
||||||
The loaded conditioning data.
|
The loaded conditioning data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self._services.conditioning.load(name)
|
return self._services.conditioning.load(name)
|
||||||
|
|
||||||
|
|
||||||
@@ -557,6 +559,28 @@ class UtilInterface(InvocationContextInterface):
|
|||||||
is_canceled=self.is_canceled,
|
is_canceled=self.is_canceled,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def torch_device(self) -> torch.device:
|
||||||
|
"""
|
||||||
|
Return a torch device to use in the current invocation.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A torch.device not currently in use by the system.
|
||||||
|
"""
|
||||||
|
ram_cache: "ModelCacheBase[AnyModel]" = self._services.model_manager.load.ram_cache
|
||||||
|
return ram_cache.get_execution_device()
|
||||||
|
|
||||||
|
def torch_dtype(self, device: Optional[torch.device] = None) -> torch.dtype:
|
||||||
|
"""
|
||||||
|
Return a precision type to use with the current invocation and torch device.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
device: Optional device.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A torch.dtype suited for the current device.
|
||||||
|
"""
|
||||||
|
return TorchDevice.choose_torch_dtype(device)
|
||||||
|
|
||||||
|
|
||||||
class InvocationContext:
|
class InvocationContext:
|
||||||
"""Provides access to various services and data for the current invocation.
|
"""Provides access to various services and data for the current invocation.
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ from enum import Enum
|
|||||||
from typing import Literal, Optional, Type, TypeAlias, Union
|
from typing import Literal, Optional, Type, TypeAlias, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from diffusers.configuration_utils import ConfigMixin
|
||||||
from diffusers.models.modeling_utils import ModelMixin
|
from diffusers.models.modeling_utils import ModelMixin
|
||||||
from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapter
|
from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapter
|
||||||
from typing_extensions import Annotated, Any, Dict
|
from typing_extensions import Annotated, Any, Dict
|
||||||
@@ -37,7 +38,7 @@ from ..raw_model import RawModel
|
|||||||
|
|
||||||
# ModelMixin is the base class for all diffusers and transformers models
|
# ModelMixin is the base class for all diffusers and transformers models
|
||||||
# RawModel is the InvokeAI wrapper class for ip_adapters, loras, textual_inversion and onnx runtime
|
# RawModel is the InvokeAI wrapper class for ip_adapters, loras, textual_inversion and onnx runtime
|
||||||
AnyModel = Union[ModelMixin, RawModel, torch.nn.Module, Dict[str, torch.Tensor]]
|
AnyModel = Union[ConfigMixin, ModelMixin, RawModel, torch.nn.Module, Dict[str, torch.Tensor]]
|
||||||
|
|
||||||
|
|
||||||
class InvalidModelConfigException(Exception):
|
class InvalidModelConfigException(Exception):
|
||||||
@@ -177,6 +178,7 @@ class ModelConfigBase(BaseModel):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseModel]) -> None:
|
def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseModel]) -> None:
|
||||||
|
"""Extend the pydantic schema from a json."""
|
||||||
schema["required"].extend(["key", "type", "format"])
|
schema["required"].extend(["key", "type", "format"])
|
||||||
|
|
||||||
model_config = ConfigDict(validate_assignment=True, json_schema_extra=json_schema_extra)
|
model_config = ConfigDict(validate_assignment=True, json_schema_extra=json_schema_extra)
|
||||||
@@ -443,7 +445,7 @@ class ModelConfigFactory(object):
|
|||||||
model = dest_class.model_validate(model_data)
|
model = dest_class.model_validate(model_data)
|
||||||
else:
|
else:
|
||||||
# mypy doesn't typecheck TypeAdapters well?
|
# mypy doesn't typecheck TypeAdapters well?
|
||||||
model = AnyModelConfigValidator.validate_python(model_data) # type: ignore
|
model = AnyModelConfigValidator.validate_python(model_data)
|
||||||
assert model is not None
|
assert model is not None
|
||||||
if key:
|
if key:
|
||||||
model.key = key
|
model.key = key
|
||||||
|
|||||||
@@ -65,8 +65,7 @@ class LoadedModelWithoutConfig:
|
|||||||
|
|
||||||
def __enter__(self) -> AnyModel:
|
def __enter__(self) -> AnyModel:
|
||||||
"""Context entry."""
|
"""Context entry."""
|
||||||
self._locker.lock()
|
return self._locker.lock()
|
||||||
return self.model
|
|
||||||
|
|
||||||
def __exit__(self, *args: Any, **kwargs: Any) -> None:
|
def __exit__(self, *args: Any, **kwargs: Any) -> None:
|
||||||
"""Context exit."""
|
"""Context exit."""
|
||||||
|
|||||||
@@ -8,9 +8,10 @@ model will be cleared and (re)loaded from disk when next needed.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from contextlib import contextmanager
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from typing import Dict, Generic, Optional, TypeVar
|
from typing import Dict, Generator, Generic, Optional, Set, TypeVar
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -51,44 +52,13 @@ class CacheRecord(Generic[T]):
|
|||||||
Elements of the cache:
|
Elements of the cache:
|
||||||
|
|
||||||
key: Unique key for each model, same as used in the models database.
|
key: Unique key for each model, same as used in the models database.
|
||||||
model: Model in memory.
|
model: Read-only copy of the model *without weights* residing in the "meta device"
|
||||||
state_dict: A read-only copy of the model's state dict in RAM. It will be
|
|
||||||
used as a template for creating a copy in the VRAM.
|
|
||||||
size: Size of the model
|
size: Size of the model
|
||||||
loaded: True if the model's state dict is currently in VRAM
|
|
||||||
|
|
||||||
Before a model is executed, the state_dict template is copied into VRAM,
|
|
||||||
and then injected into the model. When the model is finished, the VRAM
|
|
||||||
copy of the state dict is deleted, and the RAM version is reinjected
|
|
||||||
into the model.
|
|
||||||
|
|
||||||
The state_dict should be treated as a read-only attribute. Do not attempt
|
|
||||||
to patch or otherwise modify it. Instead, patch the copy of the state_dict
|
|
||||||
after it is loaded into the execution device (e.g. CUDA) using the `LoadedModel`
|
|
||||||
context manager call `model_on_device()`.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
key: str
|
key: str
|
||||||
model: T
|
|
||||||
device: torch.device
|
|
||||||
state_dict: Optional[Dict[str, torch.Tensor]]
|
|
||||||
size: int
|
size: int
|
||||||
loaded: bool = False
|
model: T
|
||||||
_locks: int = 0
|
|
||||||
|
|
||||||
def lock(self) -> None:
|
|
||||||
"""Lock this record."""
|
|
||||||
self._locks += 1
|
|
||||||
|
|
||||||
def unlock(self) -> None:
|
|
||||||
"""Unlock this record."""
|
|
||||||
self._locks -= 1
|
|
||||||
assert self._locks >= 0
|
|
||||||
|
|
||||||
@property
|
|
||||||
def locked(self) -> bool:
|
|
||||||
"""Return true if record is locked."""
|
|
||||||
return self._locks > 0
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -115,14 +85,27 @@ class ModelCacheBase(ABC, Generic[T]):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def execution_device(self) -> torch.device:
|
def execution_devices(self) -> Set[torch.device]:
|
||||||
"""Return the exection device (e.g. "cuda" for VRAM)."""
|
"""Return the set of available execution devices."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@property
|
@contextmanager
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def lazy_offloading(self) -> bool:
|
def reserve_execution_device(self, timeout: int = 0) -> Generator[torch.device, None, None]:
|
||||||
"""Return true if the cache is configured to lazily offload models in VRAM."""
|
"""Reserve an execution device (GPU) under the current thread id."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_execution_device(self) -> torch.device:
|
||||||
|
"""
|
||||||
|
Return an execution device that has been reserved for current thread.
|
||||||
|
|
||||||
|
Note that reservations are done using the current thread's TID.
|
||||||
|
It might be better to do this using the session ID, but that involves
|
||||||
|
too many detailed changes to model manager calls.
|
||||||
|
|
||||||
|
May generate a ValueError if no GPU has been reserved.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -131,16 +114,6 @@ class ModelCacheBase(ABC, Generic[T]):
|
|||||||
"""Return true if the cache is configured to lazily offload models in VRAM."""
|
"""Return true if the cache is configured to lazily offload models in VRAM."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def offload_unlocked_models(self, size_required: int) -> None:
|
|
||||||
"""Offload from VRAM any models not actively in use."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
|
|
||||||
"""Move model into the indicated device."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def stats(self) -> Optional[CacheStats]:
|
def stats(self) -> Optional[CacheStats]:
|
||||||
@@ -202,6 +175,11 @@ class ModelCacheBase(ABC, Generic[T]):
|
|||||||
"""Return true if the model identified by key and submodel_type is in the cache."""
|
"""Return true if the model identified by key and submodel_type is in the cache."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> AnyModel:
|
||||||
|
"""Move a copy of the model into the indicated device and return it."""
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def cache_size(self) -> int:
|
def cache_size(self) -> int:
|
||||||
"""Get the total size of the models currently cached."""
|
"""Get the total size of the models currently cached."""
|
||||||
|
|||||||
@@ -18,17 +18,19 @@ context. Use like this:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import copy
|
||||||
import gc
|
import gc
|
||||||
import math
|
import sys
|
||||||
import time
|
import threading
|
||||||
from contextlib import suppress
|
from contextlib import contextmanager, suppress
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from typing import Dict, List, Optional
|
from threading import BoundedSemaphore
|
||||||
|
from typing import Dict, Generator, List, Optional, Set
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from invokeai.backend.model_manager import AnyModel, SubModelType
|
from invokeai.backend.model_manager import AnyModel, SubModelType
|
||||||
from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
|
from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot
|
||||||
from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
|
from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
|
||||||
from invokeai.backend.util.devices import TorchDevice
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
from invokeai.backend.util.logging import InvokeAILogger
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
@@ -39,9 +41,7 @@ from .model_locker import ModelLocker
|
|||||||
# Maximum size of the cache, in gigs
|
# Maximum size of the cache, in gigs
|
||||||
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
||||||
DEFAULT_MAX_CACHE_SIZE = 6.0
|
DEFAULT_MAX_CACHE_SIZE = 6.0
|
||||||
|
DEFAULT_MAX_VRAM_CACHE_SIZE = 0.25
|
||||||
# amount of GPU memory to hold in reserve for use by generations (GB)
|
|
||||||
DEFAULT_MAX_VRAM_CACHE_SIZE = 2.75
|
|
||||||
|
|
||||||
# actual size of a gig
|
# actual size of a gig
|
||||||
GIG = 1073741824
|
GIG = 1073741824
|
||||||
@@ -57,12 +57,8 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
self,
|
self,
|
||||||
max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
|
max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
|
||||||
max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
|
max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
|
||||||
execution_device: torch.device = torch.device("cuda"),
|
|
||||||
storage_device: torch.device = torch.device("cpu"),
|
storage_device: torch.device = torch.device("cpu"),
|
||||||
precision: torch.dtype = torch.float16,
|
precision: torch.dtype = torch.float16,
|
||||||
sequential_offload: bool = False,
|
|
||||||
lazy_offloading: bool = True,
|
|
||||||
sha_chunksize: int = 16777216,
|
|
||||||
log_memory_usage: bool = False,
|
log_memory_usage: bool = False,
|
||||||
logger: Optional[Logger] = None,
|
logger: Optional[Logger] = None,
|
||||||
):
|
):
|
||||||
@@ -70,23 +66,19 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
Initialize the model RAM cache.
|
Initialize the model RAM cache.
|
||||||
|
|
||||||
:param max_cache_size: Maximum size of the RAM cache [6.0 GB]
|
:param max_cache_size: Maximum size of the RAM cache [6.0 GB]
|
||||||
:param execution_device: Torch device to load active model into [torch.device('cuda')]
|
|
||||||
:param storage_device: Torch device to save inactive model in [torch.device('cpu')]
|
:param storage_device: Torch device to save inactive model in [torch.device('cpu')]
|
||||||
:param precision: Precision for loaded models [torch.float16]
|
:param precision: Precision for loaded models [torch.float16]
|
||||||
:param lazy_offloading: Keep model in VRAM until another model needs to be loaded
|
|
||||||
:param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
|
:param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
|
||||||
:param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
|
:param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
|
||||||
operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
|
operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
|
||||||
snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
|
snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
|
||||||
behaviour.
|
behaviour.
|
||||||
"""
|
"""
|
||||||
# allow lazy offloading only when vram cache enabled
|
|
||||||
self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
|
|
||||||
self._precision: torch.dtype = precision
|
self._precision: torch.dtype = precision
|
||||||
self._max_cache_size: float = max_cache_size
|
self._max_cache_size: float = max_cache_size
|
||||||
self._max_vram_cache_size: float = max_vram_cache_size
|
self._max_vram_cache_size: float = max_vram_cache_size
|
||||||
self._execution_device: torch.device = execution_device
|
|
||||||
self._storage_device: torch.device = storage_device
|
self._storage_device: torch.device = storage_device
|
||||||
|
self._ram_lock = threading.Lock()
|
||||||
self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
|
self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
|
||||||
self._log_memory_usage = log_memory_usage
|
self._log_memory_usage = log_memory_usage
|
||||||
self._stats: Optional[CacheStats] = None
|
self._stats: Optional[CacheStats] = None
|
||||||
@@ -94,25 +86,87 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
self._cached_models: Dict[str, CacheRecord[AnyModel]] = {}
|
self._cached_models: Dict[str, CacheRecord[AnyModel]] = {}
|
||||||
self._cache_stack: List[str] = []
|
self._cache_stack: List[str] = []
|
||||||
|
|
||||||
|
# device to thread id
|
||||||
|
self._device_lock = threading.Lock()
|
||||||
|
self._execution_devices: Dict[torch.device, int] = {x: 0 for x in TorchDevice.execution_devices()}
|
||||||
|
self._free_execution_device = BoundedSemaphore(len(self._execution_devices))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Using rendering device(s): {', '.join(sorted([str(x) for x in self._execution_devices.keys()]))}"
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def logger(self) -> Logger:
|
def logger(self) -> Logger:
|
||||||
"""Return the logger used by the cache."""
|
"""Return the logger used by the cache."""
|
||||||
return self._logger
|
return self._logger
|
||||||
|
|
||||||
@property
|
|
||||||
def lazy_offloading(self) -> bool:
|
|
||||||
"""Return true if the cache is configured to lazily offload models in VRAM."""
|
|
||||||
return self._lazy_offloading
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def storage_device(self) -> torch.device:
|
def storage_device(self) -> torch.device:
|
||||||
"""Return the storage device (e.g. "CPU" for RAM)."""
|
"""Return the storage device (e.g. "CPU" for RAM)."""
|
||||||
return self._storage_device
|
return self._storage_device
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def execution_device(self) -> torch.device:
|
def execution_devices(self) -> Set[torch.device]:
|
||||||
"""Return the exection device (e.g. "cuda" for VRAM)."""
|
"""Return the set of available execution devices."""
|
||||||
return self._execution_device
|
devices = self._execution_devices.keys()
|
||||||
|
return set(devices)
|
||||||
|
|
||||||
|
def get_execution_device(self) -> torch.device:
|
||||||
|
"""
|
||||||
|
Return an execution device that has been reserved for current thread.
|
||||||
|
|
||||||
|
Note that reservations are done using the current thread's TID.
|
||||||
|
It would be better to do this using the session ID, but that involves
|
||||||
|
too many detailed changes to model manager calls.
|
||||||
|
|
||||||
|
May generate a ValueError if no GPU has been reserved.
|
||||||
|
"""
|
||||||
|
current_thread = threading.current_thread().ident
|
||||||
|
assert current_thread is not None
|
||||||
|
assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid]
|
||||||
|
if not assigned:
|
||||||
|
raise ValueError(f"No GPU has been reserved for the use of thread {current_thread}")
|
||||||
|
return assigned[0]
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def reserve_execution_device(self, timeout: Optional[int] = None) -> Generator[torch.device, None, None]:
|
||||||
|
"""Reserve an execution device (e.g. GPU) for exclusive use by a generation thread.
|
||||||
|
|
||||||
|
Note that the reservation is done using the current thread's TID.
|
||||||
|
It would be better to do this using the session ID, but that involves
|
||||||
|
too many detailed changes to model manager calls.
|
||||||
|
"""
|
||||||
|
device = None
|
||||||
|
with self._device_lock:
|
||||||
|
current_thread = threading.current_thread().ident
|
||||||
|
assert current_thread is not None
|
||||||
|
|
||||||
|
# look for a device that has already been assigned to this thread
|
||||||
|
assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid]
|
||||||
|
if assigned:
|
||||||
|
device = assigned[0]
|
||||||
|
|
||||||
|
# no device already assigned. Get one.
|
||||||
|
if device is None:
|
||||||
|
self._free_execution_device.acquire(timeout=timeout)
|
||||||
|
with self._device_lock:
|
||||||
|
free_device = [x for x, tid in self._execution_devices.items() if tid == 0]
|
||||||
|
self._execution_devices[free_device[0]] = current_thread
|
||||||
|
device = free_device[0]
|
||||||
|
|
||||||
|
# we are outside the lock region now
|
||||||
|
self.logger.info(f"{current_thread} Reserved torch device {device}")
|
||||||
|
|
||||||
|
# Tell TorchDevice to use this object to get the torch device.
|
||||||
|
TorchDevice.set_model_cache(self)
|
||||||
|
try:
|
||||||
|
yield device
|
||||||
|
finally:
|
||||||
|
with self._device_lock:
|
||||||
|
self.logger.info(f"{current_thread} Released torch device {device}")
|
||||||
|
self._execution_devices[device] = 0
|
||||||
|
self._free_execution_device.release()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def max_cache_size(self) -> float:
|
def max_cache_size(self) -> float:
|
||||||
@@ -157,16 +211,16 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
submodel_type: Optional[SubModelType] = None,
|
submodel_type: Optional[SubModelType] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Store model under key and optional submodel_type."""
|
"""Store model under key and optional submodel_type."""
|
||||||
key = self._make_cache_key(key, submodel_type)
|
with self._ram_lock:
|
||||||
if key in self._cached_models:
|
key = self._make_cache_key(key, submodel_type)
|
||||||
return
|
if key in self._cached_models:
|
||||||
size = calc_model_size_by_data(model)
|
return
|
||||||
self.make_room(size)
|
size = calc_model_size_by_data(model)
|
||||||
|
self.make_room(size)
|
||||||
|
|
||||||
state_dict = model.state_dict() if isinstance(model, torch.nn.Module) else None
|
cache_record = CacheRecord(key=key, model=model, size=size)
|
||||||
cache_record = CacheRecord(key=key, model=model, device=self.storage_device, state_dict=state_dict, size=size)
|
self._cached_models[key] = cache_record
|
||||||
self._cached_models[key] = cache_record
|
self._cache_stack.append(key)
|
||||||
self._cache_stack.append(key)
|
|
||||||
|
|
||||||
def get(
|
def get(
|
||||||
self,
|
self,
|
||||||
@@ -184,36 +238,37 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
|
|
||||||
This may raise an IndexError if the model is not in the cache.
|
This may raise an IndexError if the model is not in the cache.
|
||||||
"""
|
"""
|
||||||
key = self._make_cache_key(key, submodel_type)
|
with self._ram_lock:
|
||||||
if key in self._cached_models:
|
key = self._make_cache_key(key, submodel_type)
|
||||||
if self.stats:
|
if key in self._cached_models:
|
||||||
self.stats.hits += 1
|
if self.stats:
|
||||||
else:
|
self.stats.hits += 1
|
||||||
if self.stats:
|
else:
|
||||||
self.stats.misses += 1
|
if self.stats:
|
||||||
raise IndexError(f"The model with key {key} is not in the cache.")
|
self.stats.misses += 1
|
||||||
|
raise IndexError(f"The model with key {key} is not in the cache.")
|
||||||
|
|
||||||
cache_entry = self._cached_models[key]
|
cache_entry = self._cached_models[key]
|
||||||
|
|
||||||
# more stats
|
# more stats
|
||||||
if self.stats:
|
if self.stats:
|
||||||
stats_name = stats_name or key
|
stats_name = stats_name or key
|
||||||
self.stats.cache_size = int(self._max_cache_size * GIG)
|
self.stats.cache_size = int(self._max_cache_size * GIG)
|
||||||
self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
|
self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
|
||||||
self.stats.in_cache = len(self._cached_models)
|
self.stats.in_cache = len(self._cached_models)
|
||||||
self.stats.loaded_model_sizes[stats_name] = max(
|
self.stats.loaded_model_sizes[stats_name] = max(
|
||||||
self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.size
|
self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.size
|
||||||
|
)
|
||||||
|
|
||||||
|
# this moves the entry to the top (right end) of the stack
|
||||||
|
with suppress(Exception):
|
||||||
|
self._cache_stack.remove(key)
|
||||||
|
self._cache_stack.append(key)
|
||||||
|
return ModelLocker(
|
||||||
|
cache=self,
|
||||||
|
cache_entry=cache_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
# this moves the entry to the top (right end) of the stack
|
|
||||||
with suppress(Exception):
|
|
||||||
self._cache_stack.remove(key)
|
|
||||||
self._cache_stack.append(key)
|
|
||||||
return ModelLocker(
|
|
||||||
cache=self,
|
|
||||||
cache_entry=cache_entry,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
|
def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
|
||||||
if self._log_memory_usage:
|
if self._log_memory_usage:
|
||||||
return MemorySnapshot.capture()
|
return MemorySnapshot.capture()
|
||||||
@@ -225,127 +280,34 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
else:
|
else:
|
||||||
return model_key
|
return model_key
|
||||||
|
|
||||||
def offload_unlocked_models(self, size_required: int) -> None:
|
def model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> AnyModel:
|
||||||
"""Move any unused models from VRAM."""
|
"""Move a copy of the model into the indicated device and return it.
|
||||||
reserved = self._max_vram_cache_size * GIG
|
|
||||||
vram_in_use = torch.cuda.memory_allocated() + size_required
|
|
||||||
self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM needed for models; max allowed={(reserved/GIG):.2f}GB")
|
|
||||||
for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
|
|
||||||
if vram_in_use <= reserved:
|
|
||||||
break
|
|
||||||
if not cache_entry.loaded:
|
|
||||||
continue
|
|
||||||
if not cache_entry.locked:
|
|
||||||
self.move_model_to_device(cache_entry, self.storage_device)
|
|
||||||
cache_entry.loaded = False
|
|
||||||
vram_in_use = torch.cuda.memory_allocated() + size_required
|
|
||||||
self.logger.debug(
|
|
||||||
f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GIG):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GIG):.2f}GB"
|
|
||||||
)
|
|
||||||
|
|
||||||
TorchDevice.empty_cache()
|
|
||||||
|
|
||||||
def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
|
|
||||||
"""Move model into the indicated device.
|
|
||||||
|
|
||||||
:param cache_entry: The CacheRecord for the model
|
:param cache_entry: The CacheRecord for the model
|
||||||
:param target_device: The torch.device to move the model into
|
:param target_device: The torch.device to move the model into
|
||||||
|
|
||||||
May raise a torch.cuda.OutOfMemoryError
|
May raise a torch.cuda.OutOfMemoryError
|
||||||
"""
|
"""
|
||||||
self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
|
with self._ram_lock:
|
||||||
source_device = cache_entry.device
|
self.logger.debug(f"Called to move {cache_entry.key} ({type(cache_entry.model)=}) to {target_device}")
|
||||||
|
|
||||||
# Note: We compare device types only so that 'cuda' == 'cuda:0'.
|
# Some models don't have a state dictionary, in which case the
|
||||||
# This would need to be revised to support multi-GPU.
|
# stored model will still reside in CPU
|
||||||
if torch.device(source_device).type == torch.device(target_device).type:
|
if hasattr(cache_entry.model, "to"):
|
||||||
return
|
model_in_gpu = copy.deepcopy(cache_entry.model)
|
||||||
|
assert hasattr(model_in_gpu, "to")
|
||||||
# Some models don't have a `to` method, in which case they run in RAM/CPU.
|
model_in_gpu.to(target_device)
|
||||||
if not hasattr(cache_entry.model, "to"):
|
return model_in_gpu
|
||||||
return
|
else:
|
||||||
|
return cache_entry.model # what happens in CPU stays in CPU
|
||||||
# This roundabout method for moving the model around is done to avoid
|
|
||||||
# the cost of moving the model from RAM to VRAM and then back from VRAM to RAM.
|
|
||||||
# When moving to VRAM, we copy (not move) each element of the state dict from
|
|
||||||
# RAM to a new state dict in VRAM, and then inject it into the model.
|
|
||||||
# This operation is slightly faster than running `to()` on the whole model.
|
|
||||||
#
|
|
||||||
# When the model needs to be removed from VRAM we simply delete the copy
|
|
||||||
# of the state dict in VRAM, and reinject the state dict that is cached
|
|
||||||
# in RAM into the model. So this operation is very fast.
|
|
||||||
start_model_to_time = time.time()
|
|
||||||
snapshot_before = self._capture_memory_snapshot()
|
|
||||||
|
|
||||||
try:
|
|
||||||
if cache_entry.state_dict is not None:
|
|
||||||
assert hasattr(cache_entry.model, "load_state_dict")
|
|
||||||
if target_device == self.storage_device:
|
|
||||||
cache_entry.model.load_state_dict(cache_entry.state_dict, assign=True)
|
|
||||||
else:
|
|
||||||
new_dict: Dict[str, torch.Tensor] = {}
|
|
||||||
for k, v in cache_entry.state_dict.items():
|
|
||||||
new_dict[k] = v.to(torch.device(target_device), copy=True, non_blocking=True)
|
|
||||||
cache_entry.model.load_state_dict(new_dict, assign=True)
|
|
||||||
cache_entry.model.to(target_device, non_blocking=True)
|
|
||||||
cache_entry.device = target_device
|
|
||||||
except Exception as e: # blow away cache entry
|
|
||||||
self._delete_cache_entry(cache_entry)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
snapshot_after = self._capture_memory_snapshot()
|
|
||||||
end_model_to_time = time.time()
|
|
||||||
self.logger.debug(
|
|
||||||
f"Moved model '{cache_entry.key}' from {source_device} to"
|
|
||||||
f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
|
|
||||||
f"Estimated model size: {(cache_entry.size/GIG):.3f} GB."
|
|
||||||
f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if (
|
|
||||||
snapshot_before is not None
|
|
||||||
and snapshot_after is not None
|
|
||||||
and snapshot_before.vram is not None
|
|
||||||
and snapshot_after.vram is not None
|
|
||||||
):
|
|
||||||
vram_change = abs(snapshot_before.vram - snapshot_after.vram)
|
|
||||||
|
|
||||||
# If the estimated model size does not match the change in VRAM, log a warning.
|
|
||||||
if not math.isclose(
|
|
||||||
vram_change,
|
|
||||||
cache_entry.size,
|
|
||||||
rel_tol=0.1,
|
|
||||||
abs_tol=10 * MB,
|
|
||||||
):
|
|
||||||
self.logger.debug(
|
|
||||||
f"Moving model '{cache_entry.key}' from {source_device} to"
|
|
||||||
f" {target_device} caused an unexpected change in VRAM usage. The model's"
|
|
||||||
" estimated size may be incorrect. Estimated model size:"
|
|
||||||
f" {(cache_entry.size/GIG):.3f} GB.\n"
|
|
||||||
f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def print_cuda_stats(self) -> None:
|
def print_cuda_stats(self) -> None:
|
||||||
"""Log CUDA diagnostics."""
|
"""Log CUDA diagnostics."""
|
||||||
vram = "%4.2fG" % (torch.cuda.memory_allocated() / GIG)
|
vram = "%4.2fG" % (torch.cuda.memory_allocated() / GIG)
|
||||||
ram = "%4.2fG" % (self.cache_size() / GIG)
|
ram = "%4.2fG" % (self.cache_size() / GIG)
|
||||||
|
|
||||||
in_ram_models = 0
|
in_ram_models = len(self._cached_models)
|
||||||
in_vram_models = 0
|
self.logger.debug(f"Current VRAM/RAM usage for {in_ram_models} models: {vram}/{ram}")
|
||||||
locked_in_vram_models = 0
|
|
||||||
for cache_record in self._cached_models.values():
|
|
||||||
if hasattr(cache_record.model, "device"):
|
|
||||||
if cache_record.model.device == self.storage_device:
|
|
||||||
in_ram_models += 1
|
|
||||||
else:
|
|
||||||
in_vram_models += 1
|
|
||||||
if cache_record.locked:
|
|
||||||
locked_in_vram_models += 1
|
|
||||||
|
|
||||||
self.logger.debug(
|
|
||||||
f"Current VRAM/RAM usage: {vram}/{ram}; models_in_ram/models_in_vram(locked) ="
|
|
||||||
f" {in_ram_models}/{in_vram_models}({locked_in_vram_models})"
|
|
||||||
)
|
|
||||||
|
|
||||||
def make_room(self, size: int) -> None:
|
def make_room(self, size: int) -> None:
|
||||||
"""Make enough room in the cache to accommodate a new model of indicated size."""
|
"""Make enough room in the cache to accommodate a new model of indicated size."""
|
||||||
@@ -368,12 +330,14 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
while current_size + bytes_needed > maximum_size and pos < len(self._cache_stack):
|
while current_size + bytes_needed > maximum_size and pos < len(self._cache_stack):
|
||||||
model_key = self._cache_stack[pos]
|
model_key = self._cache_stack[pos]
|
||||||
cache_entry = self._cached_models[model_key]
|
cache_entry = self._cached_models[model_key]
|
||||||
device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None
|
|
||||||
self.logger.debug(
|
|
||||||
f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not cache_entry.locked:
|
refs = sys.getrefcount(cache_entry.model)
|
||||||
|
|
||||||
|
# Expected refs:
|
||||||
|
# 1 from cache_entry
|
||||||
|
# 1 from getrefcount function
|
||||||
|
# 1 from onnx runtime object
|
||||||
|
if refs <= (3 if "onnx" in model_key else 2):
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
f"Removing {model_key} from RAM cache to free at least {(size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
|
f"Removing {model_key} from RAM cache to free at least {(size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
|
||||||
)
|
)
|
||||||
@@ -400,10 +364,26 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
if self.stats:
|
if self.stats:
|
||||||
self.stats.cleared = models_cleared
|
self.stats.cleared = models_cleared
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
TorchDevice.empty_cache()
|
TorchDevice.empty_cache()
|
||||||
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
|
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
|
||||||
|
|
||||||
|
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
|
||||||
|
if target_device.type != "cuda":
|
||||||
|
return
|
||||||
|
vram_device = ( # mem_get_info() needs an indexed device
|
||||||
|
target_device if target_device.index is not None else torch.device(str(target_device), index=0)
|
||||||
|
)
|
||||||
|
free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
|
||||||
|
if needed_size > free_mem:
|
||||||
|
raise torch.cuda.OutOfMemoryError
|
||||||
|
|
||||||
def _delete_cache_entry(self, cache_entry: CacheRecord[AnyModel]) -> None:
|
def _delete_cache_entry(self, cache_entry: CacheRecord[AnyModel]) -> None:
|
||||||
self._cache_stack.remove(cache_entry.key)
|
try:
|
||||||
del self._cached_models[cache_entry.key]
|
self._cache_stack.remove(cache_entry.key)
|
||||||
|
del self._cached_models[cache_entry.key]
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _device_name(device: torch.device) -> str:
|
||||||
|
return f"{device.type}:{device.index}"
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ from invokeai.backend.model_manager import AnyModel
|
|||||||
|
|
||||||
from .model_cache_base import CacheRecord, ModelCacheBase, ModelLockerBase
|
from .model_cache_base import CacheRecord, ModelCacheBase, ModelLockerBase
|
||||||
|
|
||||||
|
MAX_GPU_WAIT = 600 # wait up to 10 minutes for a GPU to become free
|
||||||
|
|
||||||
|
|
||||||
class ModelLocker(ModelLockerBase):
|
class ModelLocker(ModelLockerBase):
|
||||||
"""Internal class that mediates movement in and out of GPU."""
|
"""Internal class that mediates movement in and out of GPU."""
|
||||||
@@ -29,33 +31,29 @@ class ModelLocker(ModelLockerBase):
|
|||||||
"""Return the model without moving it around."""
|
"""Return the model without moving it around."""
|
||||||
return self._cache_entry.model
|
return self._cache_entry.model
|
||||||
|
|
||||||
def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
|
|
||||||
"""Return the state dict (if any) for the cached model."""
|
|
||||||
return self._cache_entry.state_dict
|
|
||||||
|
|
||||||
def lock(self) -> AnyModel:
|
def lock(self) -> AnyModel:
|
||||||
"""Move the model into the execution device (GPU) and lock it."""
|
"""Move the model into the execution device (GPU) and lock it."""
|
||||||
self._cache_entry.lock()
|
|
||||||
try:
|
try:
|
||||||
if self._cache.lazy_offloading:
|
device = self._cache.get_execution_device()
|
||||||
self._cache.offload_unlocked_models(self._cache_entry.size)
|
model_on_device = self._cache.model_to_device(self._cache_entry, device)
|
||||||
self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
|
self._cache.logger.debug(f"Moved {self._cache_entry.key} to {device}")
|
||||||
self._cache_entry.loaded = True
|
|
||||||
self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
|
|
||||||
self._cache.print_cuda_stats()
|
self._cache.print_cuda_stats()
|
||||||
except torch.cuda.OutOfMemoryError:
|
except torch.cuda.OutOfMemoryError:
|
||||||
self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
|
self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
|
||||||
self._cache_entry.unlock()
|
|
||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception:
|
||||||
self._cache_entry.unlock()
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return self.model
|
return model_on_device
|
||||||
|
|
||||||
|
# It is no longer necessary to move the model out of VRAM
|
||||||
|
# because it will be removed when it goes out of scope
|
||||||
|
# in the caller's context
|
||||||
def unlock(self) -> None:
|
def unlock(self) -> None:
|
||||||
"""Call upon exit from context."""
|
"""Call upon exit from context."""
|
||||||
self._cache_entry.unlock()
|
self._cache.print_cuda_stats()
|
||||||
if not self._cache.lazy_offloading:
|
|
||||||
self._cache.offload_unlocked_models(0)
|
# This is no longer in use in MGPU.
|
||||||
self._cache.print_cuda_stats()
|
def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
|
||||||
|
"""Return the state dict (if any) for the cached model."""
|
||||||
|
return None
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
|
import threading
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
|
from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
@@ -34,6 +35,8 @@ with LoRAHelper.apply_lora_unet(unet, loras):
|
|||||||
|
|
||||||
# TODO: rename smth like ModelPatcher and add TI method?
|
# TODO: rename smth like ModelPatcher and add TI method?
|
||||||
class ModelPatcher:
|
class ModelPatcher:
|
||||||
|
_thread_lock = threading.Lock()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
|
def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
|
||||||
assert "." not in lora_key
|
assert "." not in lora_key
|
||||||
@@ -106,7 +109,7 @@ class ModelPatcher:
|
|||||||
"""
|
"""
|
||||||
original_weights = {}
|
original_weights = {}
|
||||||
try:
|
try:
|
||||||
with torch.no_grad():
|
with torch.no_grad(), cls._thread_lock:
|
||||||
for lora, lora_weight in loras:
|
for lora, lora_weight in loras:
|
||||||
# assert lora.device.type == "cpu"
|
# assert lora.device.type == "cpu"
|
||||||
for layer_key, layer in lora.layers.items():
|
for layer_key, layer in lora.layers.items():
|
||||||
@@ -129,9 +132,7 @@ class ModelPatcher:
|
|||||||
dtype = module.weight.dtype
|
dtype = module.weight.dtype
|
||||||
|
|
||||||
if module_key not in original_weights:
|
if module_key not in original_weights:
|
||||||
if model_state_dict is not None: # we were provided with the CPU copy of the state dict
|
if model_state_dict is None: # no CPU copy of the state dict was provided
|
||||||
original_weights[module_key] = model_state_dict[module_key + ".weight"]
|
|
||||||
else:
|
|
||||||
original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
|
original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
|
||||||
|
|
||||||
layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
|
layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
|
||||||
|
|||||||
@@ -32,8 +32,11 @@ class SDXLConditioningInfo(BasicConditioningInfo):
|
|||||||
|
|
||||||
def to(self, device, dtype=None):
|
def to(self, device, dtype=None):
|
||||||
self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype)
|
self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype)
|
||||||
|
assert self.pooled_embeds.device == device
|
||||||
self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype)
|
self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype)
|
||||||
return super().to(device=device, dtype=dtype)
|
result = super().to(device=device, dtype=dtype)
|
||||||
|
assert self.embeds.device == device
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
import threading
|
||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@@ -293,24 +294,31 @@ class InvokeAIDiffuserComponent:
|
|||||||
cross_attention_kwargs["regional_ip_data"] = regional_ip_data
|
cross_attention_kwargs["regional_ip_data"] = regional_ip_data
|
||||||
|
|
||||||
added_cond_kwargs = None
|
added_cond_kwargs = None
|
||||||
if conditioning_data.is_sdxl():
|
try:
|
||||||
added_cond_kwargs = {
|
if conditioning_data.is_sdxl():
|
||||||
"text_embeds": torch.cat(
|
# tid = threading.current_thread().ident
|
||||||
[
|
# print(f'DEBUG {tid} {conditioning_data.uncond_text.pooled_embeds.device=} {conditioning_data.cond_text.pooled_embeds.device=}', flush=True),
|
||||||
# TODO: how to pad? just by zeros? or even truncate?
|
added_cond_kwargs = {
|
||||||
conditioning_data.uncond_text.pooled_embeds,
|
"text_embeds": torch.cat(
|
||||||
conditioning_data.cond_text.pooled_embeds,
|
[
|
||||||
],
|
# TODO: how to pad? just by zeros? or even truncate?
|
||||||
dim=0,
|
conditioning_data.uncond_text.pooled_embeds,
|
||||||
),
|
conditioning_data.cond_text.pooled_embeds,
|
||||||
"time_ids": torch.cat(
|
],
|
||||||
[
|
dim=0,
|
||||||
conditioning_data.uncond_text.add_time_ids,
|
),
|
||||||
conditioning_data.cond_text.add_time_ids,
|
"time_ids": torch.cat(
|
||||||
],
|
[
|
||||||
dim=0,
|
conditioning_data.uncond_text.add_time_ids,
|
||||||
),
|
conditioning_data.cond_text.add_time_ids,
|
||||||
}
|
],
|
||||||
|
dim=0,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
tid = threading.current_thread().ident
|
||||||
|
print(f"DEBUG: {tid} {str(e)}")
|
||||||
|
raise e
|
||||||
|
|
||||||
if conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None:
|
if conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None:
|
||||||
# TODO(ryand): We currently initialize RegionalPromptData for every denoising step. The text conditionings
|
# TODO(ryand): We currently initialize RegionalPromptData for every denoising step. The text conditionings
|
||||||
|
|||||||
@@ -1,10 +1,16 @@
|
|||||||
from typing import Dict, Literal, Optional, Union
|
"""Torch Device class provides torch device selection services."""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Dict, Literal, Optional, Set, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from deprecated import deprecated
|
from deprecated import deprecated
|
||||||
|
|
||||||
from invokeai.app.services.config.config_default import get_config
|
from invokeai.app.services.config.config_default import get_config
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from invokeai.backend.model_manager.config import AnyModel
|
||||||
|
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
|
||||||
|
|
||||||
# legacy APIs
|
# legacy APIs
|
||||||
TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]
|
TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]
|
||||||
CPU_DEVICE = torch.device("cpu")
|
CPU_DEVICE = torch.device("cpu")
|
||||||
@@ -42,9 +48,23 @@ PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NA
|
|||||||
class TorchDevice:
|
class TorchDevice:
|
||||||
"""Abstraction layer for torch devices."""
|
"""Abstraction layer for torch devices."""
|
||||||
|
|
||||||
|
_model_cache: Optional["ModelCacheBase[AnyModel]"] = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def set_model_cache(cls, cache: "ModelCacheBase[AnyModel]"):
|
||||||
|
"""Set the current model cache."""
|
||||||
|
cls._model_cache = cache
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def choose_torch_device(cls) -> torch.device:
|
def choose_torch_device(cls) -> torch.device:
|
||||||
"""Return the torch.device to use for accelerated inference."""
|
"""Return the torch.device to use for accelerated inference."""
|
||||||
|
if cls._model_cache:
|
||||||
|
return cls._model_cache.get_execution_device()
|
||||||
|
else:
|
||||||
|
return cls._choose_device()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _choose_device(cls) -> torch.device:
|
||||||
app_config = get_config()
|
app_config = get_config()
|
||||||
if app_config.device != "auto":
|
if app_config.device != "auto":
|
||||||
device = torch.device(app_config.device)
|
device = torch.device(app_config.device)
|
||||||
@@ -56,11 +76,19 @@ class TorchDevice:
|
|||||||
device = CPU_DEVICE
|
device = CPU_DEVICE
|
||||||
return cls.normalize(device)
|
return cls.normalize(device)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def execution_devices(cls) -> Set[torch.device]:
|
||||||
|
"""Return a list of torch.devices that can be used for accelerated inference."""
|
||||||
|
app_config = get_config()
|
||||||
|
if app_config.devices is None:
|
||||||
|
return cls._lookup_execution_devices()
|
||||||
|
return {torch.device(x) for x in app_config.devices}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def choose_torch_dtype(cls, device: Optional[torch.device] = None) -> torch.dtype:
|
def choose_torch_dtype(cls, device: Optional[torch.device] = None) -> torch.dtype:
|
||||||
"""Return the precision to use for accelerated inference."""
|
"""Return the precision to use for accelerated inference."""
|
||||||
device = device or cls.choose_torch_device()
|
|
||||||
config = get_config()
|
config = get_config()
|
||||||
|
device = device or cls._choose_device()
|
||||||
if device.type == "cuda" and torch.cuda.is_available():
|
if device.type == "cuda" and torch.cuda.is_available():
|
||||||
device_name = torch.cuda.get_device_name(device)
|
device_name = torch.cuda.get_device_name(device)
|
||||||
if "GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name:
|
if "GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name:
|
||||||
@@ -108,3 +136,13 @@ class TorchDevice:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
|
def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
|
||||||
return NAME_TO_PRECISION[precision_name]
|
return NAME_TO_PRECISION[precision_name]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _lookup_execution_devices(cls) -> Set[torch.device]:
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
devices = {torch.device(f"cuda:{x}") for x in range(0, torch.cuda.device_count())}
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
devices = {torch.device("mps")}
|
||||||
|
else:
|
||||||
|
devices = {torch.device("cpu")}
|
||||||
|
return devices
|
||||||
|
|||||||
54
scripts/populate_model_db_from_yaml.py
Executable file
54
scripts/populate_model_db_from_yaml.py
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/env python
|
||||||
|
|
||||||
|
from argparse import ArgumentParser, Namespace
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from invokeai.app.services.config import InvokeAIAppConfig, get_config
|
||||||
|
from invokeai.app.services.download import DownloadQueueService
|
||||||
|
from invokeai.app.services.model_install import ModelInstallService
|
||||||
|
from invokeai.app.services.model_records import ModelRecordServiceSQL
|
||||||
|
from invokeai.app.services.shared.sqlite.sqlite_database import SqliteDatabase
|
||||||
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
|
|
||||||
|
|
||||||
|
def get_args() -> Namespace:
|
||||||
|
parser = ArgumentParser(description="Update models database from yaml file")
|
||||||
|
parser.add_argument("--root", type=Path, required=False, default=None)
|
||||||
|
parser.add_argument("--yaml_file", type=Path, required=False, default=None)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def populate_config() -> InvokeAIAppConfig:
|
||||||
|
args = get_args()
|
||||||
|
config = get_config()
|
||||||
|
if args.root:
|
||||||
|
config._root = args.root
|
||||||
|
if args.yaml_file:
|
||||||
|
config.legacy_models_yaml_path = args.yaml_file
|
||||||
|
else:
|
||||||
|
config.legacy_models_yaml_path = config.root_path / "configs/models.yaml"
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_installer(config: InvokeAIAppConfig) -> ModelInstallService:
|
||||||
|
logger = InvokeAILogger.get_logger(config=config)
|
||||||
|
db = SqliteDatabase(config.db_path, logger)
|
||||||
|
record_store = ModelRecordServiceSQL(db)
|
||||||
|
queue = DownloadQueueService()
|
||||||
|
queue.start()
|
||||||
|
installer = ModelInstallService(app_config=config, record_store=record_store, download_queue=queue)
|
||||||
|
return installer
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
config = populate_config()
|
||||||
|
installer = initialize_installer(config)
|
||||||
|
installer._migrate_yaml(rename_yaml=False, overwrite_db=True)
|
||||||
|
print("\n<INSTALLED MODELS>")
|
||||||
|
print("\t".join(["key", "name", "type", "path"]))
|
||||||
|
for model in installer.record_store.all_models():
|
||||||
|
print("\t".join([model.key, model.name, model.type, (config.models_path / model.path).as_posix()]))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -14,13 +14,14 @@ def test_loading(mm2_model_manager: ModelManagerServiceBase, embedding_file: Pat
|
|||||||
matches = store.search_by_attr(model_name="test_embedding")
|
matches = store.search_by_attr(model_name="test_embedding")
|
||||||
assert len(matches) == 0
|
assert len(matches) == 0
|
||||||
key = mm2_model_manager.install.register_path(embedding_file)
|
key = mm2_model_manager.install.register_path(embedding_file)
|
||||||
loaded_model = mm2_model_manager.load.load_model(store.get_model(key))
|
with mm2_model_manager.load.ram_cache.reserve_execution_device():
|
||||||
assert loaded_model is not None
|
loaded_model = mm2_model_manager.load.load_model(store.get_model(key))
|
||||||
assert loaded_model.config.key == key
|
assert loaded_model is not None
|
||||||
with loaded_model as model:
|
assert loaded_model.config.key == key
|
||||||
assert isinstance(model, TextualInversionModelRaw)
|
with loaded_model as model:
|
||||||
|
assert isinstance(model, TextualInversionModelRaw)
|
||||||
|
|
||||||
config = mm2_model_manager.store.get_model(key)
|
config = mm2_model_manager.store.get_model(key)
|
||||||
loaded_model_2 = mm2_model_manager.load.load_model(config)
|
loaded_model_2 = mm2_model_manager.load.load_model(config)
|
||||||
|
|
||||||
assert loaded_model.config.key == loaded_model_2.config.key
|
assert loaded_model.config.key == loaded_model_2.config.key
|
||||||
|
|||||||
@@ -89,11 +89,10 @@ def mm2_download_queue(mm2_session: Session) -> DownloadQueueServiceBase:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mm2_loader(mm2_app_config: InvokeAIAppConfig, mm2_record_store: ModelRecordServiceBase) -> ModelLoadServiceBase:
|
def mm2_loader(mm2_app_config: InvokeAIAppConfig) -> ModelLoadServiceBase:
|
||||||
ram_cache = ModelCache(
|
ram_cache = ModelCache(
|
||||||
logger=InvokeAILogger.get_logger(),
|
logger=InvokeAILogger.get_logger(),
|
||||||
max_cache_size=mm2_app_config.ram,
|
max_cache_size=mm2_app_config.ram,
|
||||||
max_vram_cache_size=mm2_app_config.vram,
|
|
||||||
)
|
)
|
||||||
convert_cache = ModelConvertCache(mm2_app_config.convert_cache_path)
|
convert_cache = ModelConvertCache(mm2_app_config.convert_cache_path)
|
||||||
return ModelLoadService(
|
return ModelLoadService(
|
||||||
|
|||||||
@@ -8,7 +8,9 @@ import pytest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from invokeai.app.services.config import get_config
|
from invokeai.app.services.config import get_config
|
||||||
|
from invokeai.backend.model_manager.load import ModelCache
|
||||||
from invokeai.backend.util.devices import TorchDevice, choose_precision, choose_torch_device, torch_dtype
|
from invokeai.backend.util.devices import TorchDevice, choose_precision, choose_torch_device, torch_dtype
|
||||||
|
from tests.backend.model_manager.model_manager_fixtures import * # noqa F403
|
||||||
|
|
||||||
devices = ["cpu", "cuda:0", "cuda:1", "mps"]
|
devices = ["cpu", "cuda:0", "cuda:1", "mps"]
|
||||||
device_types_cpu = [("cpu", torch.float32), ("cuda:0", torch.float32), ("mps", torch.float32)]
|
device_types_cpu = [("cpu", torch.float32), ("cuda:0", torch.float32), ("mps", torch.float32)]
|
||||||
@@ -20,6 +22,7 @@ device_types_mps = [("cpu", torch.float32), ("cuda:0", torch.float32), ("mps", t
|
|||||||
def test_device_choice(device_name):
|
def test_device_choice(device_name):
|
||||||
config = get_config()
|
config = get_config()
|
||||||
config.device = device_name
|
config.device = device_name
|
||||||
|
TorchDevice.set_model_cache(None) # disable dynamic selection of GPU device
|
||||||
torch_device = TorchDevice.choose_torch_device()
|
torch_device = TorchDevice.choose_torch_device()
|
||||||
assert torch_device == torch.device(device_name)
|
assert torch_device == torch.device(device_name)
|
||||||
|
|
||||||
@@ -130,3 +133,32 @@ def test_legacy_precision_name():
|
|||||||
assert "float16" == choose_precision(torch.device("cuda"))
|
assert "float16" == choose_precision(torch.device("cuda"))
|
||||||
assert "float16" == choose_precision(torch.device("mps"))
|
assert "float16" == choose_precision(torch.device("mps"))
|
||||||
assert "float32" == choose_precision(torch.device("cpu"))
|
assert "float32" == choose_precision(torch.device("cpu"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_multi_device_support_1():
|
||||||
|
config = get_config()
|
||||||
|
config.devices = ["cuda:0", "cuda:1"]
|
||||||
|
assert TorchDevice.execution_devices() == {torch.device("cuda:0"), torch.device("cuda:1")}
|
||||||
|
|
||||||
|
|
||||||
|
def test_multi_device_support_2():
|
||||||
|
config = get_config()
|
||||||
|
config.devices = None
|
||||||
|
with (
|
||||||
|
patch("torch.cuda.device_count", return_value=3),
|
||||||
|
patch("torch.cuda.is_available", return_value=True),
|
||||||
|
):
|
||||||
|
assert TorchDevice.execution_devices() == {
|
||||||
|
torch.device("cuda:0"),
|
||||||
|
torch.device("cuda:1"),
|
||||||
|
torch.device("cuda:2"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_multi_device_support_3():
|
||||||
|
config = get_config()
|
||||||
|
config.devices = ["cuda:0", "cuda:1"]
|
||||||
|
cache = ModelCache()
|
||||||
|
with cache.reserve_execution_device() as gpu:
|
||||||
|
assert gpu in [torch.device(x) for x in config.devices]
|
||||||
|
assert TorchDevice.choose_torch_device() == gpu
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ from invokeai.app.services.config.config_default import InvokeAIAppConfig
|
|||||||
from invokeai.app.services.images.images_default import ImageService
|
from invokeai.app.services.images.images_default import ImageService
|
||||||
from invokeai.app.services.invocation_cache.invocation_cache_memory import MemoryInvocationCache
|
from invokeai.app.services.invocation_cache.invocation_cache_memory import MemoryInvocationCache
|
||||||
from invokeai.app.services.invocation_services import InvocationServices
|
from invokeai.app.services.invocation_services import InvocationServices
|
||||||
from invokeai.app.services.invocation_stats.invocation_stats_default import InvocationStatsService
|
|
||||||
from invokeai.app.services.invoker import Invoker
|
from invokeai.app.services.invoker import Invoker
|
||||||
from invokeai.backend.util.logging import InvokeAILogger
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
from tests.backend.model_manager.model_manager_fixtures import * # noqa: F403
|
from tests.backend.model_manager.model_manager_fixtures import * # noqa: F403
|
||||||
@@ -49,13 +48,13 @@ def mock_services() -> InvocationServices:
|
|||||||
model_manager=None, # type: ignore
|
model_manager=None, # type: ignore
|
||||||
download_queue=None, # type: ignore
|
download_queue=None, # type: ignore
|
||||||
names=None, # type: ignore
|
names=None, # type: ignore
|
||||||
performance_statistics=InvocationStatsService(),
|
|
||||||
session_processor=None, # type: ignore
|
session_processor=None, # type: ignore
|
||||||
session_queue=None, # type: ignore
|
session_queue=None, # type: ignore
|
||||||
urls=None, # type: ignore
|
urls=None, # type: ignore
|
||||||
workflow_records=None, # type: ignore
|
workflow_records=None, # type: ignore
|
||||||
tensors=None, # type: ignore
|
tensors=None, # type: ignore
|
||||||
conditioning=None, # type: ignore
|
conditioning=None, # type: ignore
|
||||||
|
performance_statistics=None, # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -92,7 +92,6 @@ def test_migrate_v3_config_from_file(tmp_path: Path, patch_rootdir: None):
|
|||||||
assert config.host == "192.168.1.1"
|
assert config.host == "192.168.1.1"
|
||||||
assert config.port == 8080
|
assert config.port == 8080
|
||||||
assert config.ram == 100
|
assert config.ram == 100
|
||||||
assert config.vram == 50
|
|
||||||
assert config.legacy_models_yaml_path == Path("/custom/models.yaml")
|
assert config.legacy_models_yaml_path == Path("/custom/models.yaml")
|
||||||
# This should be stripped out
|
# This should be stripped out
|
||||||
assert not hasattr(config, "esrgan")
|
assert not hasattr(config, "esrgan")
|
||||||
|
|||||||
Reference in New Issue
Block a user