From 4c5bad63521128955d94243324cbdc42d52b2da1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 19 Jun 2024 21:35:50 -0400 Subject: [PATCH 1/3] [MM] add API routes for getting & setting MM cache sizes, and retrieving MM stats --- invokeai/app/api/routers/model_manager.py | 80 ++++++++++++++++--- .../model_manager/load/load_default.py | 6 +- .../load/model_cache/model_cache_base.py | 19 ++++- .../load/model_cache/model_cache_default.py | 13 ++- 4 files changed, 100 insertions(+), 18 deletions(-) diff --git a/invokeai/app/api/routers/model_manager.py b/invokeai/app/api/routers/model_manager.py index 99f00423c6..1730d383f0 100644 --- a/invokeai/app/api/routers/model_manager.py +++ b/invokeai/app/api/routers/model_manager.py @@ -6,6 +6,7 @@ import pathlib import shutil import traceback from copy import deepcopy +from enum import Enum from typing import Any, Dict, List, Optional, Type from fastapi import Body, Path, Query, Response, UploadFile @@ -16,6 +17,7 @@ from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field from starlette.exceptions import HTTPException from typing_extensions import Annotated +from invokeai.app.services.config import get_config from invokeai.app.services.model_images.model_images_common import ModelImageFileNotFoundException from invokeai.app.services.model_install.model_install_common import ModelInstallJob from invokeai.app.services.model_records import ( @@ -32,6 +34,7 @@ from invokeai.backend.model_manager.config import ( ModelType, SubModelType, ) +from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException from invokeai.backend.model_manager.search import ModelSearch @@ -53,6 +56,13 @@ class ModelsList(BaseModel): model_config = ConfigDict(use_enum_values=True) +class CacheType(str, Enum): + """Cache type - one of vram or ram.""" + + RAM = "RAM" + VRAM = "VRAM" + + def add_cover_image_to_model_config(config: AnyModelConfig, dependencies: Type[ApiDependencies]) -> AnyModelConfig: """Add a cover image URL to a model configuration.""" cover_image = dependencies.invoker.services.model_images.get_url(config.key) @@ -174,18 +184,6 @@ async def get_model_record( raise HTTPException(status_code=404, detail=str(e)) -# @model_manager_router.get("/summary", operation_id="list_model_summary") -# async def list_model_summary( -# page: int = Query(default=0, description="The page to get"), -# per_page: int = Query(default=10, description="The number of models per page"), -# order_by: ModelRecordOrderBy = Query(default=ModelRecordOrderBy.Default, description="The attribute to order by"), -# ) -> PaginatedResults[ModelSummary]: -# """Gets a page of model summary data.""" -# record_store = ApiDependencies.invoker.services.model_manager.store -# results: PaginatedResults[ModelSummary] = record_store.list_models(page=page, per_page=per_page, order_by=order_by) -# return results - - class FoundModel(BaseModel): path: str = Field(description="Path to the model") is_installed: bool = Field(description="Whether or not the model is already installed") @@ -816,3 +814,61 @@ async def get_starter_models() -> list[StarterModel]: model.dependencies = missing_deps return starter_models + + +@model_manager_router.get( + "/model_cache", + operation_id="get_cache_size", + response_model=float, + summary="Get maximum size of model manager RAM or VRAM cache.", +) +async def get_cache_size(cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM)) -> float: + """Return the current RAM or VRAM cache size setting (in GB).""" + cache = ApiDependencies.invoker.services.model_manager.load.ram_cache + return cache.max_cache_size if cache_type == CacheType.RAM else cache.max_vram_cache_size + + +@model_manager_router.put( + "/model_cache", + operation_id="set_cache_size", + response_model=float, + summary="Set maximum size of model manager RAM or VRAM cache, optionally writing new value out to invokeai.yaml config file.", +) +async def set_cache_size( + value: float = Query(description="The new value for the maximum cache size"), + cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM), + persist: bool = Query(description="Write new value out to invokeai.yaml", default=False), +) -> float: + """Set the current RAM or VRAM cache size setting (in GB). .""" + cache = ApiDependencies.invoker.services.model_manager.load.ram_cache + app_config = get_config() + if cache_type == CacheType.RAM: + cache.max_cache_size = value + app_config.ram = value + elif cache_type == CacheType.VRAM: + cache.max_vram_cache_size = value + app_config.vram = value + + if persist: + config_path = app_config.config_file_path + print(f"DEBUG: config_path = {config_path}") + try: + shutil.copy(config_path, config_path.with_suffix(".yaml.bak")) + app_config.write_file(config_path) + except Exception as e: + shutil.move(config_path.with_suffix(".yaml.bak"), config_path) + raise RuntimeError(f"Failed to write modified configuration to {config_path}: {e}") from e + + return cache.max_vram_cache_size if cache_type == CacheType.VRAM else cache.max_cache_size + + +@model_manager_router.get( + "/stats", + operation_id="get_stats", + response_model=Optional[CacheStats], + summary="Get model manager RAM cache performance statistics.", +) +async def get_stats() -> Optional[CacheStats]: + """Return performance statistics on the model manager's RAM cache. Will return null if no models have been loaded.""" + + return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index a63cc66a86..13030395d2 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -79,8 +79,10 @@ class ModelLoader(ModelLoaderBase): def _convert_and_load( self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None ) -> ModelLockerBase: + stats_name = ":".join([config.base, config.type, config.name, (submodel_type or "")]) + try: - return self._ram_cache.get(config.key, submodel_type) + return self._ram_cache.get(config.key, submodel_type, stats_name=stats_name) except IndexError: pass @@ -100,7 +102,7 @@ class ModelLoader(ModelLoaderBase): return self._ram_cache.get( key=config.key, submodel_type=submodel_type, - stats_name=":".join([config.base, config.type, config.name, (submodel_type or "")]), + stats_name=stats_name, ) def get_size_fs( diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py index 012fd42d55..6d775cb632 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py @@ -128,7 +128,24 @@ class ModelCacheBase(ABC, Generic[T]): @property @abstractmethod def max_cache_size(self) -> float: - """Return true if the cache is configured to lazily offload models in VRAM.""" + """Return the maximum size the RAM cache can grow to.""" + pass + + @max_cache_size.setter + @abstractmethod + def max_cache_size(self, value: float) -> None: + """Set the cap on vram cache size.""" + + @property + @abstractmethod + def max_vram_cache_size(self) -> float: + """Return the maximum size the VRAM cache can grow to.""" + pass + + @max_vram_cache_size.setter + @abstractmethod + def max_vram_cache_size(self, value: float) -> float: + """Set the maximum size the VRAM cache can grow to.""" pass @abstractmethod diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index d48e45426e..173acd6bb1 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -60,9 +60,7 @@ class ModelCache(ModelCacheBase[AnyModel]): execution_device: torch.device = torch.device("cuda"), storage_device: torch.device = torch.device("cpu"), precision: torch.dtype = torch.float16, - sequential_offload: bool = False, lazy_offloading: bool = True, - sha_chunksize: int = 16777216, log_memory_usage: bool = False, logger: Optional[Logger] = None, ): @@ -74,7 +72,6 @@ class ModelCache(ModelCacheBase[AnyModel]): :param storage_device: Torch device to save inactive model in [torch.device('cpu')] :param precision: Precision for loaded models [torch.float16] :param lazy_offloading: Keep model in VRAM until another model needs to be loaded - :param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's @@ -124,6 +121,16 @@ class ModelCache(ModelCacheBase[AnyModel]): """Set the cap on cache size.""" self._max_cache_size = value + @property + def max_vram_cache_size(self) -> float: + """Return the cap on vram cache size.""" + return self._max_vram_cache_size + + @max_vram_cache_size.setter + def max_vram_cache_size(self, value: float) -> None: + """Set the cap on vram cache size.""" + self._max_vram_cache_size = value + @property def stats(self) -> Optional[CacheStats]: """Return collected CacheStats object.""" From 787671c2c29b519b8e28feb1bb82df3170a688a2 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 21 Jun 2024 15:15:31 -0400 Subject: [PATCH 2/3] Update invokeai/app/api/routers/model_manager.py Co-authored-by: Ryan Dick --- invokeai/app/api/routers/model_manager.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/invokeai/app/api/routers/model_manager.py b/invokeai/app/api/routers/model_manager.py index 1730d383f0..ffb7e909e5 100644 --- a/invokeai/app/api/routers/model_manager.py +++ b/invokeai/app/api/routers/model_manager.py @@ -825,7 +825,12 @@ async def get_starter_models() -> list[StarterModel]: async def get_cache_size(cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM)) -> float: """Return the current RAM or VRAM cache size setting (in GB).""" cache = ApiDependencies.invoker.services.model_manager.load.ram_cache - return cache.max_cache_size if cache_type == CacheType.RAM else cache.max_vram_cache_size + if cache_type == CacheType.RAM: + return cache.max_cache_size + elif cache_type == CacheType.VRAM: + return cache.max_vram_cache_size + else: + raise ValueError(f"Unexpected {cache_type=}.") @model_manager_router.put( From 27195b167280966f75463c2b1ab9ad2ea0c489a8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 21 Jun 2024 15:36:37 -0400 Subject: [PATCH 3/3] code cleanup after @ryand review --- invokeai/app/api/routers/model_manager.py | 25 +++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/invokeai/app/api/routers/model_manager.py b/invokeai/app/api/routers/model_manager.py index ffb7e909e5..a0c5ad8017 100644 --- a/invokeai/app/api/routers/model_manager.py +++ b/invokeai/app/api/routers/model_manager.py @@ -847,24 +847,37 @@ async def set_cache_size( """Set the current RAM or VRAM cache size setting (in GB). .""" cache = ApiDependencies.invoker.services.model_manager.load.ram_cache app_config = get_config() + vram_bak, ram_bak = (app_config.vram, app_config.ram) + if cache_type == CacheType.RAM: cache.max_cache_size = value app_config.ram = value elif cache_type == CacheType.VRAM: cache.max_vram_cache_size = value app_config.vram = value + else: + raise ValueError(f"Unexpected {cache_type=}.") if persist: config_path = app_config.config_file_path - print(f"DEBUG: config_path = {config_path}") + new_config_path = config_path.with_suffix(".yaml.new") + backup_config_path = config_path.with_suffix(".yaml.bak") + shutil.copy(config_path, backup_config_path) try: - shutil.copy(config_path, config_path.with_suffix(".yaml.bak")) - app_config.write_file(config_path) + app_config.write_file(new_config_path) + shutil.move(new_config_path, config_path) except Exception as e: - shutil.move(config_path.with_suffix(".yaml.bak"), config_path) - raise RuntimeError(f"Failed to write modified configuration to {config_path}: {e}") from e + shutil.move(backup_config_path, config_path) + app_config.max_vram_cache_size = vram_bak + app_config.max_cache_size = ram_bak + raise RuntimeError(f"Failed to save configuration to {config_path}: {e}") from e - return cache.max_vram_cache_size if cache_type == CacheType.VRAM else cache.max_cache_size + if cache_type == CacheType.VRAM: + return cache.max_vram_cache_size + elif cache_type == CacheType.RAM: + return cache.max_cache_size + else: + raise ValueError(f"Unexpected {cache_type=}.") @model_manager_router.get(