merge cache setting api

This commit is contained in:
Lincoln Stein
2024-06-23 12:43:58 -04:00
4 changed files with 119 additions and 18 deletions

View File

@@ -6,6 +6,7 @@ import pathlib
import shutil
import traceback
from copy import deepcopy
from enum import Enum
from typing import Any, Dict, List, Optional, Type
from fastapi import Body, Path, Query, Response, UploadFile
@@ -16,6 +17,7 @@ from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field
from starlette.exceptions import HTTPException
from typing_extensions import Annotated
from invokeai.app.services.config import get_config
from invokeai.app.services.model_images.model_images_common import ModelImageFileNotFoundException
from invokeai.app.services.model_install.model_install_common import ModelInstallJob
from invokeai.app.services.model_records import (
@@ -32,6 +34,7 @@ from invokeai.backend.model_manager.config import (
ModelType,
SubModelType,
)
from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats
from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch
from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException
from invokeai.backend.model_manager.search import ModelSearch
@@ -53,6 +56,13 @@ class ModelsList(BaseModel):
model_config = ConfigDict(use_enum_values=True)
class CacheType(str, Enum):
"""Cache type - one of vram or ram."""
RAM = "RAM"
VRAM = "VRAM"
def add_cover_image_to_model_config(config: AnyModelConfig, dependencies: Type[ApiDependencies]) -> AnyModelConfig:
"""Add a cover image URL to a model configuration."""
cover_image = dependencies.invoker.services.model_images.get_url(config.key)
@@ -174,18 +184,6 @@ async def get_model_record(
raise HTTPException(status_code=404, detail=str(e))
# @model_manager_router.get("/summary", operation_id="list_model_summary")
# async def list_model_summary(
# page: int = Query(default=0, description="The page to get"),
# per_page: int = Query(default=10, description="The number of models per page"),
# order_by: ModelRecordOrderBy = Query(default=ModelRecordOrderBy.Default, description="The attribute to order by"),
# ) -> PaginatedResults[ModelSummary]:
# """Gets a page of model summary data."""
# record_store = ApiDependencies.invoker.services.model_manager.store
# results: PaginatedResults[ModelSummary] = record_store.list_models(page=page, per_page=per_page, order_by=order_by)
# return results
class FoundModel(BaseModel):
path: str = Field(description="Path to the model")
is_installed: bool = Field(description="Whether or not the model is already installed")
@@ -816,3 +814,79 @@ async def get_starter_models() -> list[StarterModel]:
model.dependencies = missing_deps
return starter_models
@model_manager_router.get(
"/model_cache",
operation_id="get_cache_size",
response_model=float,
summary="Get maximum size of model manager RAM or VRAM cache.",
)
async def get_cache_size(cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM)) -> float:
"""Return the current RAM or VRAM cache size setting (in GB)."""
cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
if cache_type == CacheType.RAM:
return cache.max_cache_size
elif cache_type == CacheType.VRAM:
return cache.max_vram_cache_size
else:
raise ValueError(f"Unexpected {cache_type=}.")
@model_manager_router.put(
"/model_cache",
operation_id="set_cache_size",
response_model=float,
summary="Set maximum size of model manager RAM or VRAM cache, optionally writing new value out to invokeai.yaml config file.",
)
async def set_cache_size(
value: float = Query(description="The new value for the maximum cache size"),
cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM),
persist: bool = Query(description="Write new value out to invokeai.yaml", default=False),
) -> float:
"""Set the current RAM or VRAM cache size setting (in GB). ."""
cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
app_config = get_config()
vram_bak, ram_bak = (app_config.vram, app_config.ram)
if cache_type == CacheType.RAM:
cache.max_cache_size = value
app_config.ram = value
elif cache_type == CacheType.VRAM:
cache.max_vram_cache_size = value
app_config.vram = value
else:
raise ValueError(f"Unexpected {cache_type=}.")
if persist:
config_path = app_config.config_file_path
new_config_path = config_path.with_suffix(".yaml.new")
backup_config_path = config_path.with_suffix(".yaml.bak")
shutil.copy(config_path, backup_config_path)
try:
app_config.write_file(new_config_path)
shutil.move(new_config_path, config_path)
except Exception as e:
shutil.move(backup_config_path, config_path)
app_config.max_vram_cache_size = vram_bak
app_config.max_cache_size = ram_bak
raise RuntimeError(f"Failed to save configuration to {config_path}: {e}") from e
if cache_type == CacheType.VRAM:
return cache.max_vram_cache_size
elif cache_type == CacheType.RAM:
return cache.max_cache_size
else:
raise ValueError(f"Unexpected {cache_type=}.")
@model_manager_router.get(
"/stats",
operation_id="get_stats",
response_model=Optional[CacheStats],
summary="Get model manager RAM cache performance statistics.",
)
async def get_stats() -> Optional[CacheStats]:
"""Return performance statistics on the model manager's RAM cache. Will return null if no models have been loaded."""
return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats

View File

@@ -79,8 +79,10 @@ class ModelLoader(ModelLoaderBase):
def _convert_and_load(
self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
) -> ModelLockerBase:
stats_name = ":".join([config.base, config.type, config.name, (submodel_type or "")])
try:
return self._ram_cache.get(config.key, submodel_type)
return self._ram_cache.get(config.key, submodel_type, stats_name=stats_name)
except IndexError:
pass
@@ -100,7 +102,7 @@ class ModelLoader(ModelLoaderBase):
return self._ram_cache.get(
key=config.key,
submodel_type=submodel_type,
stats_name=":".join([config.base, config.type, config.name, (submodel_type or "")]),
stats_name=stats_name,
)
def get_size_fs(

View File

@@ -143,7 +143,24 @@ class ModelCacheBase(ABC, Generic[T]):
@property
@abstractmethod
def max_cache_size(self) -> float:
"""Return true if the cache is configured to lazily offload models in VRAM."""
"""Return the maximum size the RAM cache can grow to."""
pass
@max_cache_size.setter
@abstractmethod
def max_cache_size(self, value: float) -> None:
"""Set the cap on vram cache size."""
@property
@abstractmethod
def max_vram_cache_size(self) -> float:
"""Return the maximum size the VRAM cache can grow to."""
pass
@max_vram_cache_size.setter
@abstractmethod
def max_vram_cache_size(self, value: float) -> float:
"""Set the maximum size the VRAM cache can grow to."""
pass
@property

View File

@@ -61,8 +61,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
storage_device: torch.device = torch.device("cpu"),
execution_devices: Optional[Set[torch.device]] = None,
precision: torch.dtype = torch.float16,
sequential_offload: bool = False,
sha_chunksize: int = 16777216,
lazy_offloading: bool = True,
log_memory_usage: bool = False,
logger: Optional[Logger] = None,
):
@@ -72,7 +71,6 @@ class ModelCache(ModelCacheBase[AnyModel]):
:param max_cache_size: Maximum size of the RAM cache [6.0 GB]
:param storage_device: Torch device to save inactive model in [torch.device('cpu')]
:param precision: Precision for loaded models [torch.float16]
:param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
:param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
@@ -182,6 +180,16 @@ class ModelCache(ModelCacheBase[AnyModel]):
"""Set the cap on cache size."""
self._max_cache_size = value
@property
def max_vram_cache_size(self) -> float:
"""Return the cap on vram cache size."""
return self._max_vram_cache_size
@max_vram_cache_size.setter
def max_vram_cache_size(self, value: float) -> None:
"""Set the cap on vram cache size."""
self._max_vram_cache_size = value
@property
def stats(self) -> Optional[CacheStats]:
"""Return collected CacheStats object."""