Compare commits

...

3 Commits

Author SHA1 Message Date
Lincoln Stein
393451f684 fix misplaced model lock() call 2024-04-04 23:08:56 -04:00
Lincoln Stein
0c9332835d Merge branch 'main' into lstein/bugfix/vram-oom-errors 2024-04-04 22:34:44 -04:00
Lincoln Stein
edcea9c7ab adjust free vram calculation for models that will be removed by lazy offloading 2024-04-04 21:59:42 -04:00
3 changed files with 13 additions and 5 deletions

View File

@@ -80,6 +80,7 @@ class ModelManagerService(ModelManagerServiceBase):
ram_cache = ModelCache(
max_cache_size=app_config.ram,
max_vram_cache_size=app_config.vram,
lazy_offloading=app_config.lazy_offload,
logger=logger,
execution_device=execution_device,
)

View File

@@ -421,13 +421,20 @@ class ModelCache(ModelCacheBase[AnyModel]):
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
def _free_vram(self, device: torch.device) -> int:
vram_device = ( # mem_get_info() needs an indexed device
device if device.index is not None else torch.device(str(device), index=0)
)
free_mem, _ = torch.cuda.mem_get_info(vram_device)
for _, cache_entry in self._cached_models.items():
if cache_entry.loaded and not cache_entry.locked:
free_mem += cache_entry.size
return free_mem
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
if target_device.type != "cuda":
return
vram_device = ( # mem_get_info() needs an indexed device
target_device if target_device.index is not None else torch.device(str(target_device), index=0)
)
free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
free_mem = self._free_vram(target_device)
if needed_size > free_mem:
needed_gb = round(needed_size / GIG, 2)
free_gb = round(free_mem / GIG, 2)

View File

@@ -34,7 +34,6 @@ class ModelLocker(ModelLockerBase):
# NOTE that the model has to have the to() method in order for this code to move it into GPU!
self._cache_entry.lock()
try:
if self._cache.lazy_offloading:
self._cache.offload_unlocked_models(self._cache_entry.size)
@@ -51,6 +50,7 @@ class ModelLocker(ModelLockerBase):
except Exception:
self._cache_entry.unlock()
raise
return self.model
def unlock(self) -> None: