use model_class.load_singlefile() instead of converting; works, but performance is poor

2026-04-23 03:00:31 -04:00 · 2024-06-07 15:34:14 -04:00
parent 0dbec3ad8b
commit 067b805044
4 changed files with 189 additions and 114 deletions
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -84,12 +84,15 @@ class ModelLoader(ModelLoaderBase):
        except IndexError:
            pass

-        cache_path: Path = self._convert_cache.cache_path(config.key)
-        if self._needs_conversion(config, model_path, cache_path):
-            loaded_model = self._do_convert(config, model_path, cache_path, submodel_type)
-        else:
-            config.path = str(cache_path) if cache_path.exists() else str(self._get_model_path(config))
-            loaded_model = self._load_model(config, submodel_type)
+        config.path = str(self._get_model_path(config))
+        loaded_model = self._load_model(config, submodel_type)
+        
+        # cache_path: Path = self._convert_cache.cache_path(config.key)
+        # if self._needs_conversion(config, model_path, cache_path):
+        #     loaded_model = self._do_convert(config, model_path, cache_path, submodel_type)
+        # else:
+        #     config.path = str(cache_path) if cache_path.exists() else str(self._get_model_path(config))
+        #     loaded_model = self._load_model(config, submodel_type)

        self._ram_cache.put(
            config.key,
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@@ -3,7 +3,7 @@

 from pathlib import Path
 from typing import Optional
-
+from diffusers import ControlNetModel
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@@ -11,8 +11,7 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.config import CheckpointConfigBase
-from invokeai.backend.model_manager.convert_ckpt_to_diffusers import convert_controlnet_to_diffusers
+from invokeai.backend.model_manager.config import SubModelType, ControlNetCheckpointConfig

 from .. import ModelLoaderRegistry
 from .generic_diffusers import GenericDiffusersLoader
@@ -23,36 +22,46 @@ from .generic_diffusers import GenericDiffusersLoader
 class ControlNetLoader(GenericDiffusersLoader):
    """Class to load ControlNet models."""

-    def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
-        if not isinstance(config, CheckpointConfigBase):
-            return False
-        elif (
-            dest_path.exists()
-            and (dest_path / "config.json").stat().st_mtime >= (config.converted_at or 0.0)
-            and (dest_path / "config.json").stat().st_mtime >= model_path.stat().st_mtime
-        ):
-            return False
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if isinstance(config, ControlNetCheckpointConfig):
+            return ControlNetModel.from_single_file(config.path, config=self._app_config.legacy_conf_path / config.config_path)
        else:
-            return True
+            return super()._load_model(config, submodel_type)

-    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
-        assert isinstance(config, CheckpointConfigBase)
-        image_size = (
-            512
-            if config.base == BaseModelType.StableDiffusion1
-            else 768
-            if config.base == BaseModelType.StableDiffusion2
-            else 1024
-        )
+    # def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
+    #     if not isinstance(config, CheckpointConfigBase):
+    #         return False
+    #     elif (
+    #         dest_path.exists()
+    #         and (dest_path / "config.json").stat().st_mtime >= (config.converted_at or 0.0)
+    #         and (dest_path / "config.json").stat().st_mtime >= model_path.stat().st_mtime
+    #     ):
+    #         return False
+    #     else:
+    #         return True

-        self._logger.info(f"Converting {model_path} to diffusers format")
-        with open(self._app_config.legacy_conf_path / config.config_path, "r") as config_stream:
-            result = convert_controlnet_to_diffusers(
-                model_path,
-                output_path,
-                original_config_file=config_stream,
-                image_size=image_size,
-                precision=self._torch_dtype,
-                from_safetensors=model_path.suffix == ".safetensors",
-            )
-        return result
+    # def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
+    #     assert isinstance(config, CheckpointConfigBase)
+    #     image_size = (
+    #         512
+    #         if config.base == BaseModelType.StableDiffusion1
+    #         else 768
+    #         if config.base == BaseModelType.StableDiffusion2
+    #         else 1024
+    #     )
+
+    #     self._logger.info(f"Converting {model_path} to diffusers format")
+    #     with open(self._app_config.legacy_conf_path / config.config_path, "r") as config_stream:
+    #         result = convert_controlnet_to_diffusers(
+    #             model_path,
+    #             output_path,
+    #             original_config_file=config_stream,
+    #             image_size=image_size,
+    #             precision=self._torch_dtype,
+    #             from_safetensors=model_path.suffix == ".safetensors",
+    #         )
+    #     return result
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -3,13 +3,21 @@

 from pathlib import Path
 from typing import Optional
+from diffusers import (
+    StableDiffusionPipeline,
+    StableDiffusionInpaintPipeline,
+    StableDiffusionXLPipeline,
+    StableDiffusionXLInpaintPipeline,
+)

+from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
    ModelType,
+    ModelVariantType,
    SchedulerPredictionType,
    SubModelType,
 )
@@ -50,6 +58,10 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
    ) -> AnyModel:
        if not submodel_type is not None:
            raise Exception("A submodel type must be provided when loading main pipelines.")
+
+        if isinstance(config, CheckpointConfigBase):
+            return self._load_from_singlefile(config, submodel_type)
+
        model_path = Path(config.path)
        load_class = self.get_hf_load_class(model_path, submodel_type)
        repo_variant = config.repo_variant if isinstance(config, DiffusersConfigBase) else None
@@ -71,46 +83,86 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):

        return result

-    def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
-        if not isinstance(config, CheckpointConfigBase):
-            return False
-        elif (
-            dest_path.exists()
-            and (dest_path / "model_index.json").stat().st_mtime >= (config.converted_at or 0.0)
-            and (dest_path / "model_index.json").stat().st_mtime >= model_path.stat().st_mtime
-        ):
-            return False
-        else:
-            return True
-
-    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
+    def _load_from_singlefile(
+            self,
+            config: AnyModelConfig,
+            submodel_type: SubModelType,
+    ) -> AnyModel:
+        load_classes = {
+            BaseModelType.StableDiffusion1: {
+                ModelVariantType.Normal: StableDiffusionPipeline,
+                ModelVariantType.Inpaint: StableDiffusionInpaintPipeline,
+            },
+            BaseModelType.StableDiffusion2: {
+                ModelVariantType.Normal: StableDiffusionPipeline,
+                ModelVariantType.Inpaint: StableDiffusionInpaintPipeline,
+            },
+            BaseModelType.StableDiffusionXL: {
+                ModelVariantType.Normal: StableDiffusionXLPipeline,
+                ModelVariantType.Inpaint: StableDiffusionXLInpaintPipeline,
+                }
+        }
        assert isinstance(config, MainCheckpointConfig)
-        base = config.base
+        try:
+            load_class = load_classes[config.base][config.variant]
+        except KeyError as e:
+            raise Exception(f'No diffusers pipeline known for base={config.base}, variant={config.variant}') from e
+        print(f'DEBUG: load_class={load_class}')
+        original_config_file=self._app_config.legacy_conf_path / config.config_path   # should try without using this...
+        pipeline = load_class.from_single_file(config.path, config=original_config_file)

-        prediction_type = config.prediction_type.value
-        upcast_attention = config.upcast_attention
-        image_size = (
-            1024
-            if base == BaseModelType.StableDiffusionXL
-            else 768
-            if config.prediction_type == SchedulerPredictionType.VPrediction and base == BaseModelType.StableDiffusion2
-            else 512
-        )
+        # Proactively load the various submodels into the RAM cache so that we don't have to re-convert
+        # the entire pipeline every time a new submodel is needed.
+        for subtype in SubModelType:
+            if subtype == submodel_type:
+                continue
+            if submodel := getattr(pipeline, subtype.value, None):
+                self._ram_cache.put(
+                    config.key, submodel_type=subtype, model=submodel, size=calc_model_size_by_data(submodel)
+                )
+        return getattr(pipeline, submodel_type.value)

-        self._logger.info(f"Converting {model_path} to diffusers format")

-        loaded_model = convert_ckpt_to_diffusers(
-            model_path,
-            output_path,
-            model_type=self.model_base_to_model_type[base],
-            original_config_file=self._app_config.legacy_conf_path / config.config_path,
-            extract_ema=True,
-            from_safetensors=model_path.suffix == ".safetensors",
-            precision=self._torch_dtype,
-            prediction_type=prediction_type,
-            image_size=image_size,
-            upcast_attention=upcast_attention,
-            load_safety_checker=False,
-            num_in_channels=VARIANT_TO_IN_CHANNEL_MAP[config.variant],
-        )
-        return loaded_model
+    # def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
+    #     if not isinstance(config, CheckpointConfigBase):
+    #         return False
+    #     elif (
+    #         dest_path.exists()
+    #         and (dest_path / "model_index.json").stat().st_mtime >= (config.converted_at or 0.0)
+    #         and (dest_path / "model_index.json").stat().st_mtime >= model_path.stat().st_mtime
+    #     ):
+    #         return False
+    #     else:
+    #         return True
+
+    # def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
+    #     assert isinstance(config, MainCheckpointConfig)
+    #     base = config.base
+
+    #     prediction_type = config.prediction_type.value
+    #     upcast_attention = config.upcast_attention
+    #     image_size = (
+    #         1024
+    #         if base == BaseModelType.StableDiffusionXL
+    #         else 768
+    #         if config.prediction_type == SchedulerPredictionType.VPrediction and base == BaseModelType.StableDiffusion2
+    #         else 512
+    #     )
+
+    #     self._logger.info(f"Converting {model_path} to diffusers format")
+
+    #     loaded_model = convert_ckpt_to_diffusers(
+    #         model_path,
+    #         output_path,
+    #         model_type=self.model_base_to_model_type[base],
+    #         original_config_file=self._app_config.legacy_conf_path / config.config_path,
+    #         extract_ema=True,
+    #         from_safetensors=model_path.suffix == ".safetensors",
+    #         precision=self._torch_dtype,
+    #         prediction_type=prediction_type,
+    #         image_size=image_size,
+    #         upcast_attention=upcast_attention,
+    #         load_safety_checker=False,
+    #         num_in_channels=VARIANT_TO_IN_CHANNEL_MAP[config.variant],
+    #     )
+    #     return loaded_model
--- a/invokeai/backend/model_manager/load/model_loaders/vae.py
+++ b/invokeai/backend/model_manager/load/model_loaders/vae.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team
 """Class for VAE model loading in InvokeAI."""

+from diffusers import AutoencoderKL
 from pathlib import Path
 from typing import Optional

@@ -14,7 +15,7 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.config import AnyModel, CheckpointConfigBase
+from invokeai.backend.model_manager.config import AnyModel, CheckpointConfigBase, SubModelType, VAECheckpointConfig
 from invokeai.backend.model_manager.convert_ckpt_to_diffusers import convert_ldm_vae_to_diffusers

 from .. import ModelLoaderRegistry
@@ -27,43 +28,53 @@ from .generic_diffusers import GenericDiffusersLoader
 class VAELoader(GenericDiffusersLoader):
    """Class to load VAE models."""

-    def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
-        if not isinstance(config, CheckpointConfigBase):
-            return False
-        elif (
-            dest_path.exists()
-            and (dest_path / "config.json").stat().st_mtime >= (config.converted_at or 0.0)
-            and (dest_path / "config.json").stat().st_mtime >= model_path.stat().st_mtime
-        ):
-            return False
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if isinstance(config, VAECheckpointConfig):
+            return AutoencoderKL.from_single_file(config.path, config=self._app_config.legacy_conf_path / config.config_path)
        else:
-            return True
+            return super()._load_model(config, submodel_type)

-    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
-        # TODO(MM2): check whether sdxl VAE models convert.
-        if config.base not in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
-            raise Exception(f"VAE conversion not supported for model type: {config.base}")
-        else:
-            assert isinstance(config, CheckpointConfigBase)
-            config_file = self._app_config.legacy_conf_path / config.config_path
+    # def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
+    #     if not isinstance(config, CheckpointConfigBase):
+    #         return False
+    #     elif (
+    #         dest_path.exists()
+    #         and (dest_path / "config.json").stat().st_mtime >= (config.converted_at or 0.0)
+    #         and (dest_path / "config.json").stat().st_mtime >= model_path.stat().st_mtime
+    #     ):
+    #         return False
+    #     else:
+    #         return True

-        if model_path.suffix == ".safetensors":
-            checkpoint = safetensors_load_file(model_path, device="cpu")
-        else:
-            checkpoint = torch.load(model_path, map_location="cpu")
+    # def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
+    #     # TODO(MM2): check whether sdxl VAE models convert.
+    #     if config.base not in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
+    #         raise Exception(f"VAE conversion not supported for model type: {config.base}")
+    #     else:
+    #         assert isinstance(config, CheckpointConfigBase)
+    #         config_file = self._app_config.legacy_conf_path / config.config_path

-        # sometimes weights are hidden under "state_dict", and sometimes not
-        if "state_dict" in checkpoint:
-            checkpoint = checkpoint["state_dict"]
+    #     if model_path.suffix == ".safetensors":
+    #         checkpoint = safetensors_load_file(model_path, device="cpu")
+    #     else:
+    #         checkpoint = torch.load(model_path, map_location="cpu")

-        ckpt_config = OmegaConf.load(config_file)
-        assert isinstance(ckpt_config, DictConfig)
-        self._logger.info(f"Converting {model_path} to diffusers format")
-        vae_model = convert_ldm_vae_to_diffusers(
-            checkpoint=checkpoint,
-            vae_config=ckpt_config,
-            image_size=512,
-            precision=self._torch_dtype,
-            dump_path=output_path,
-        )
-        return vae_model
+    #     # sometimes weights are hidden under "state_dict", and sometimes not
+    #     if "state_dict" in checkpoint:
+    #         checkpoint = checkpoint["state_dict"]
+
+    #     ckpt_config = OmegaConf.load(config_file)
+    #     assert isinstance(ckpt_config, DictConfig)
+    #     self._logger.info(f"Converting {model_path} to diffusers format")
+    #     vae_model = convert_ldm_vae_to_diffusers(
+    #         checkpoint=checkpoint,
+    #         vae_config=ckpt_config,
+    #         image_size=512,
+    #         precision=self._torch_dtype,
+    #         dump_path=output_path,
+    #     )
+    #     return vae_model