Fix a couple of bugs to get basic vanilla partial model load working with the model cache.

This commit is contained in:
Ryan Dick
2024-12-06 00:50:58 +00:00
parent 050d4465e6
commit cb884ee567
2 changed files with 5 additions and 3 deletions

View File

@@ -75,7 +75,7 @@ class LoadedModelWithoutConfig:
@property
def model(self) -> AnyModel:
"""Return the model without locking it."""
return self._cache_record.model
return self._cache_record.cached_model.model
class LoadedModel(LoadedModelWithoutConfig):

View File

@@ -219,9 +219,11 @@ class ModelCache:
# self._print_cuda_stats()
except torch.cuda.OutOfMemoryError:
self._logger.warning("Insufficient GPU memory to load model. Aborting")
raise
finally:
cache_entry.unlock()
raise
except Exception:
cache_entry.unlock()
raise
# try:
# if self._lazy_offloading: