fix misplaced model lock() call

Merge branch 'main' into lstein/bugfix/vram-oom-errors
adjust free vram calculation for models that will be removed by lazy offloading
2026-01-15 07:28:06 -05:00 · 2024-04-04 23:08:56 -04:00 · 2024-04-04 22:34:44 -04:00 · 2024-04-04 21:59:42 -04:00
3 changed files with 13 additions and 5 deletions
--- a/invokeai/app/services/model_manager/model_manager_default.py
+++ b/invokeai/app/services/model_manager/model_manager_default.py
@@ -80,6 +80,7 @@ class ModelManagerService(ModelManagerServiceBase):
        ram_cache = ModelCache(
            max_cache_size=app_config.ram,
            max_vram_cache_size=app_config.vram,
+            lazy_offloading=app_config.lazy_offload,
            logger=logger,
            execution_device=execution_device,
        )
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -421,13 +421,20 @@ class ModelCache(ModelCacheBase[AnyModel]):

        self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")

+    def _free_vram(self, device: torch.device) -> int:
+        vram_device = (  # mem_get_info() needs an indexed device
+            device if device.index is not None else torch.device(str(device), index=0)
+        )
+        free_mem, _ = torch.cuda.mem_get_info(vram_device)
+        for _, cache_entry in self._cached_models.items():
+            if cache_entry.loaded and not cache_entry.locked:
+                free_mem += cache_entry.size
+        return free_mem
+
    def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
        if target_device.type != "cuda":
            return
-        vram_device = (  # mem_get_info() needs an indexed device
-            target_device if target_device.index is not None else torch.device(str(target_device), index=0)
-        )
-        free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
+        free_mem = self._free_vram(target_device)
        if needed_size > free_mem:
            needed_gb = round(needed_size / GIG, 2)
            free_gb = round(free_mem / GIG, 2)
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@@ -34,7 +34,6 @@ class ModelLocker(ModelLockerBase):

        # NOTE that the model has to have the to() method in order for this code to move it into GPU!
        self._cache_entry.lock()
-
        try:
            if self._cache.lazy_offloading:
                self._cache.offload_unlocked_models(self._cache_entry.size)
@@ -51,6 +50,7 @@ class ModelLocker(ModelLockerBase):
        except Exception:
            self._cache_entry.unlock()
            raise
+
        return self.model

    def unlock(self) -> None:
Author	SHA1	Message	Date
Lincoln Stein	393451f684	fix misplaced model lock() call	2024-04-04 23:08:56 -04:00
Lincoln Stein	0c9332835d	Merge branch 'main' into lstein/bugfix/vram-oom-errors	2024-04-04 22:34:44 -04:00
Lincoln Stein	edcea9c7ab	adjust free vram calculation for models that will be removed by lazy offloading	2024-04-04 21:59:42 -04:00