diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index c6ea8a686a..84af41b1c9 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -180,31 +180,33 @@ class TextToLatentsInvocation(BaseInvocation): def get_model(self, model_manager: ModelManager) -> StableDiffusionGeneratorPipeline: model_info = choose_model(model_manager, self.model) - model_name = model_info['model_name'] - model_hash = model_info['hash'] - model: StableDiffusionGeneratorPipeline = model_info['model'] - model.scheduler = get_scheduler( - model=model, - scheduler_name=self.scheduler - ) + model_name = model_info.name + model_hash = model_info.hash + model_ctx: StableDiffusionGeneratorPipeline = model_info.context + with model_ctx as model: + model.scheduler = get_scheduler( + model=model, + scheduler_name=self.scheduler + ) - if isinstance(model, DiffusionPipeline): - for component in [model.unet, model.vae]: - configure_model_padding(component, + if isinstance(model, DiffusionPipeline): + for component in [model.unet, model.vae]: + configure_model_padding(component, + self.seamless, + self.seamless_axes + ) + else: + configure_model_padding(model, self.seamless, self.seamless_axes ) - else: - configure_model_padding(model, - self.seamless, - self.seamless_axes - ) - return model + return model_ctx def get_conditioning_data(self, model: StableDiffusionGeneratorPipeline) -> ConditioningData: uc, c, extra_conditioning_info = get_uc_and_c_and_ec(self.prompt, model=model) + print(f'DEBUG: uc.dtype={uc.dtype}, c.dtype={c.dtype}') conditioning_data = ConditioningData( uc, c, @@ -230,18 +232,17 @@ class TextToLatentsInvocation(BaseInvocation): def step_callback(state: PipelineIntermediateState): self.dispatch_progress(context, source_node_id, state) - model = self.get_model(context.services.model_manager) - conditioning_data = self.get_conditioning_data(model) + with self.get_model(context.services.model_manager) as model: + conditioning_data = self.get_conditioning_data(model) - # TODO: Verify the noise is the right size - - result_latents, result_attention_map_saver = model.latents_from_embeddings( - latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)), - noise=noise, - num_inference_steps=self.steps, - conditioning_data=conditioning_data, - callback=step_callback - ) + # TODO: Verify the noise is the right size + result_latents, result_attention_map_saver = model.latents_from_embeddings( + latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)), + noise=noise, + num_inference_steps=self.steps, + conditioning_data=conditioning_data, + callback=step_callback + ) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() @@ -284,29 +285,29 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation): def step_callback(state: PipelineIntermediateState): self.dispatch_progress(context, source_node_id, state) - model = self.get_model(context.services.model_manager) - conditioning_data = self.get_conditioning_data(model) + with self.get_model(context.services.model_manager) as model: + conditioning_data = self.get_conditioning_data(model) - # TODO: Verify the noise is the right size + # TODO: Verify the noise is the right size - initial_latents = latent if self.strength < 1.0 else torch.zeros_like( - latent, device=model.device, dtype=latent.dtype - ) + initial_latents = latent if self.strength < 1.0 else torch.zeros_like( + latent, device=model.device, dtype=latent.dtype + ) - timesteps, _ = model.get_img2img_timesteps( - self.steps, - self.strength, - device=model.device, - ) + timesteps, _ = model.get_img2img_timesteps( + self.steps, + self.strength, + device=model.device, + ) - result_latents, result_attention_map_saver = model.latents_from_embeddings( - latents=initial_latents, - timesteps=timesteps, - noise=noise, - num_inference_steps=self.steps, - conditioning_data=conditioning_data, - callback=step_callback - ) + result_latents, result_attention_map_saver = model.latents_from_embeddings( + latents=initial_latents, + timesteps=timesteps, + noise=noise, + num_inference_steps=self.steps, + conditioning_data=conditioning_data, + callback=step_callback + ) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() diff --git a/invokeai/app/invocations/util/choose_model.py b/invokeai/app/invocations/util/choose_model.py index cd03ce87a8..0e888d7f43 100644 --- a/invokeai/app/invocations/util/choose_model.py +++ b/invokeai/app/invocations/util/choose_model.py @@ -7,7 +7,7 @@ def choose_model(model_manager: ModelManager, model_name: str): if model_manager.valid_model(model_name): model = model_manager.get_model(model_name) else: - model = model_manager.get_model() - logger.warning(f"{model_name}' is not a valid model name. Using default model \'{model['model_name']}\' instead.") + model = model_manager.get_model(model_manager.default_model()) + logger.warning(f"'{model_name}' is not a valid model name. Using default model \'{model.name}\' instead.") return model diff --git a/invokeai/app/services/model_manager_initializer.py b/invokeai/app/services/model_manager_initializer.py index 2b1aac1f36..c7924e797d 100644 --- a/invokeai/app/services/model_manager_initializer.py +++ b/invokeai/app/services/model_manager_initializer.py @@ -47,22 +47,21 @@ def get_model_manager(config: Args, logger: types.ModuleType) -> ModelManager: else: embedding_path = None - # migrate legacy models - ModelManager.migrate_models() - # creating the model manager try: device = torch.device(choose_torch_device()) - precision = 'float16' if config.precision=='float16' \ - else 'float32' if config.precision=='float32' \ - else choose_precision(device) + if config.precision=="auto": + precision = choose_precision(device) + dtype = torch.float32 if precision=='float32' \ + else torch.float16 model_manager = ModelManager( - OmegaConf.load(config.conf), - precision=precision, + config.conf, + precision=dtype, device_type=device, max_loaded_models=config.max_loaded_models, - embedding_path = Path(embedding_path), +# temporarily disabled until model manager stabilizes +# embedding_path = Path(embedding_path), logger = logger, ) except (FileNotFoundError, TypeError, AssertionError) as e: diff --git a/invokeai/backend/__init__.py b/invokeai/backend/__init__.py index 06066dd6b1..dc2eeca67a 100644 --- a/invokeai/backend/__init__.py +++ b/invokeai/backend/__init__.py @@ -10,7 +10,7 @@ from .generator import ( Img2Img, Inpaint ) -from .model_management import ModelManager +from .model_management import ModelManager, ModelCache, ModelStatus, SDModelType from .safety_checker import SafetyChecker from .args import Args from .globals import Globals diff --git a/invokeai/backend/generate.py b/invokeai/backend/generate.py index 4f3df60f1c..c7e2558db1 100644 --- a/invokeai/backend/generate.py +++ b/invokeai/backend/generate.py @@ -37,7 +37,7 @@ from .safety_checker import SafetyChecker from .prompting import get_uc_and_c_and_ec from .prompting.conditioning import log_tokenization from .stable_diffusion import HuggingFaceConceptsLibrary -from .util import choose_precision, choose_torch_device +from .util import choose_precision, choose_torch_device, torch_dtype def fix_func(orig): if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): @@ -50,7 +50,6 @@ def fix_func(orig): return new_func return orig - torch.rand = fix_func(torch.rand) torch.rand_like = fix_func(torch.rand_like) torch.randn = fix_func(torch.randn) @@ -156,7 +155,6 @@ class Generate: weights=None, config=None, ): - mconfig = OmegaConf.load(conf) self.height = None self.width = None self.model_manager = None @@ -171,7 +169,7 @@ class Generate: self.seamless_axes = {"x", "y"} self.hires_fix = False self.embedding_path = embedding_path - self.model = None # empty for now + self.model_context = None # empty for now self.model_hash = None self.sampler = None self.device = None @@ -219,12 +217,12 @@ class Generate: # model caching system for fast switching self.model_manager = ModelManager( - mconfig, + conf, self.device, - self.precision, + torch_dtype(self.device), max_loaded_models=max_loaded_models, sequential_offload=self.free_gpu_mem, - embedding_path=Path(self.embedding_path), +# embedding_path=Path(self.embedding_path), ) # don't accept invalid models fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME @@ -418,170 +416,171 @@ class Generate: with_variations = [] if with_variations is None else with_variations # will instantiate the model or return it from cache - model = self.set_model(self.model_name) + model_context = self.set_model(self.model_name) # self.width and self.height are set by set_model() # to the width and height of the image training set width = width or self.width height = height or self.height - if isinstance(model, DiffusionPipeline): - configure_model_padding(model.unet, seamless, seamless_axes) - configure_model_padding(model.vae, seamless, seamless_axes) - else: - configure_model_padding(model, seamless, seamless_axes) - - assert cfg_scale > 1.0, "CFG_Scale (-C) must be >1.0" - assert threshold >= 0.0, "--threshold must be >=0.0" - assert ( - 0.0 < strength <= 1.0 - ), "img2img and inpaint strength can only work with 0.0 < strength < 1.0" - assert ( - 0.0 <= variation_amount <= 1.0 - ), "-v --variation_amount must be in [0.0, 1.0]" - assert 0.0 <= perlin <= 1.0, "--perlin must be in [0.0, 1.0]" - assert (embiggen == None and embiggen_tiles == None) or ( - (embiggen != None or embiggen_tiles != None) and init_img != None - ), "Embiggen requires an init/input image to be specified" - - if len(with_variations) > 0 or variation_amount > 1.0: - assert seed is not None, "seed must be specified when using with_variations" - if variation_amount == 0.0: - assert ( - iterations == 1 - ), "when using --with_variations, multiple iterations are only possible when using --variation_amount" - assert all( - 0 <= weight <= 1 for _, weight in with_variations - ), f"variation weights must be in [0.0, 1.0]: got {[weight for _, weight in with_variations]}" - - width, height, _ = self._resolution_check(width, height, log=True) - assert ( - inpaint_replace >= 0.0 and inpaint_replace <= 1.0 - ), "inpaint_replace must be between 0.0 and 1.0" - - if sampler_name and (sampler_name != self.sampler_name): - self.sampler_name = sampler_name - self._set_scheduler() - - # apply the concepts library to the prompt - prompt = self.huggingface_concepts_library.replace_concepts_with_triggers( - prompt, - lambda concepts: self.load_huggingface_concepts(concepts), - self.model.textual_inversion_manager.get_all_trigger_strings(), - ) - - tic = time.time() - if self._has_cuda(): - torch.cuda.reset_peak_memory_stats() - - results = list() - - try: - uc, c, extra_conditioning_info = get_uc_and_c_and_ec( - prompt, - model=self.model, - skip_normalize_legacy_blend=skip_normalize, - log_tokens=self.log_tokenization, - ) - - init_image, mask_image = self._make_images( - init_img, - init_mask, - width, - height, - fit=fit, - text_mask=text_mask, - invert_mask=invert_mask, - force_outpaint=force_outpaint, - ) - - # TODO: Hacky selection of operation to perform. Needs to be refactored. - generator = self.select_generator( - init_image, mask_image, embiggen, hires_fix, force_outpaint - ) - - generator.set_variation(self.seed, variation_amount, with_variations) - generator.use_mps_noise = use_mps_noise - - results = generator.generate( - prompt, - iterations=iterations, - seed=self.seed, - sampler=self.sampler, - steps=steps, - cfg_scale=cfg_scale, - conditioning=(uc, c, extra_conditioning_info), - ddim_eta=ddim_eta, - image_callback=image_callback, # called after the final image is generated - step_callback=step_callback, # called after each intermediate image is generated - width=width, - height=height, - init_img=init_img, # embiggen needs to manipulate from the unmodified init_img - init_image=init_image, # notice that init_image is different from init_img - mask_image=mask_image, - strength=strength, - threshold=threshold, - perlin=perlin, - h_symmetry_time_pct=h_symmetry_time_pct, - v_symmetry_time_pct=v_symmetry_time_pct, - embiggen=embiggen, - embiggen_tiles=embiggen_tiles, - embiggen_strength=embiggen_strength, - inpaint_replace=inpaint_replace, - mask_blur_radius=mask_blur_radius, - safety_checker=self.safety_checker, - seam_size=seam_size, - seam_blur=seam_blur, - seam_strength=seam_strength, - seam_steps=seam_steps, - tile_size=tile_size, - infill_method=infill_method, - force_outpaint=force_outpaint, - inpaint_height=inpaint_height, - inpaint_width=inpaint_width, - enable_image_debugging=enable_image_debugging, - free_gpu_mem=self.free_gpu_mem, - clear_cuda_cache=self.clear_cuda_cache, - ) - - if init_color: - self.correct_colors( - image_list=results, - reference_image_path=init_color, - image_callback=image_callback, - ) - - if upscale is not None or facetool_strength > 0: - self.upscale_and_reconstruct( - results, - upscale=upscale, - upscale_denoise_str=upscale_denoise_str, - facetool=facetool, - strength=facetool_strength, - codeformer_fidelity=codeformer_fidelity, - save_original=save_original, - image_callback=image_callback, - ) - - except KeyboardInterrupt: - # Clear the CUDA cache on an exception - self.clear_cuda_cache() - - if catch_interrupts: - logger.warning("Interrupted** Partial results will be returned.") + with model_context as model: + if isinstance(model, DiffusionPipeline): + configure_model_padding(model.unet, seamless, seamless_axes) + configure_model_padding(model.vae, seamless, seamless_axes) else: - raise KeyboardInterrupt - except RuntimeError: - # Clear the CUDA cache on an exception - self.clear_cuda_cache() + configure_model_padding(model, seamless, seamless_axes) - print(traceback.format_exc(), file=sys.stderr) - logger.info("Could not generate image.") + assert cfg_scale > 1.0, "CFG_Scale (-C) must be >1.0" + assert threshold >= 0.0, "--threshold must be >=0.0" + assert ( + 0.0 < strength <= 1.0 + ), "img2img and inpaint strength can only work with 0.0 < strength < 1.0" + assert ( + 0.0 <= variation_amount <= 1.0 + ), "-v --variation_amount must be in [0.0, 1.0]" + assert 0.0 <= perlin <= 1.0, "--perlin must be in [0.0, 1.0]" + assert (embiggen == None and embiggen_tiles == None) or ( + (embiggen != None or embiggen_tiles != None) and init_img != None + ), "Embiggen requires an init/input image to be specified" - toc = time.time() - logger.info("Usage stats:") - logger.info(f"{len(results)} image(s) generated in "+"%4.2fs" % (toc - tic)) - self.print_cuda_stats() + if len(with_variations) > 0 or variation_amount > 1.0: + assert seed is not None, "seed must be specified when using with_variations" + if variation_amount == 0.0: + assert ( + iterations == 1 + ), "when using --with_variations, multiple iterations are only possible when using --variation_amount" + assert all( + 0 <= weight <= 1 for _, weight in with_variations + ), f"variation weights must be in [0.0, 1.0]: got {[weight for _, weight in with_variations]}" + + width, height, _ = self._resolution_check(width, height, log=True) + assert ( + inpaint_replace >= 0.0 and inpaint_replace <= 1.0 + ), "inpaint_replace must be between 0.0 and 1.0" + + if sampler_name and (sampler_name != self.sampler_name): + self.sampler_name = sampler_name + self._set_scheduler(model) + + # apply the concepts library to the prompt + prompt = self.huggingface_concepts_library.replace_concepts_with_triggers( + prompt, + lambda concepts: self.load_huggingface_concepts(concepts), + model.textual_inversion_manager.get_all_trigger_strings(), + ) + + tic = time.time() + if self._has_cuda(): + torch.cuda.reset_peak_memory_stats() + + results = list() + + try: + uc, c, extra_conditioning_info = get_uc_and_c_and_ec( + prompt, + model=model, + skip_normalize_legacy_blend=skip_normalize, + log_tokens=self.log_tokenization, + ) + + init_image, mask_image = self._make_images( + init_img, + init_mask, + width, + height, + fit=fit, + text_mask=text_mask, + invert_mask=invert_mask, + force_outpaint=force_outpaint, + ) + + # TODO: Hacky selection of operation to perform. Needs to be refactored. + generator = self.select_generator( + init_image, mask_image, embiggen, hires_fix, force_outpaint + ) + + generator.set_variation(self.seed, variation_amount, with_variations) + generator.use_mps_noise = use_mps_noise + + results = generator.generate( + prompt, + iterations=iterations, + seed=self.seed, + sampler=self.sampler, + steps=steps, + cfg_scale=cfg_scale, + conditioning=(uc, c, extra_conditioning_info), + ddim_eta=ddim_eta, + image_callback=image_callback, # called after the final image is generated + step_callback=step_callback, # called after each intermediate image is generated + width=width, + height=height, + init_img=init_img, # embiggen needs to manipulate from the unmodified init_img + init_image=init_image, # notice that init_image is different from init_img + mask_image=mask_image, + strength=strength, + threshold=threshold, + perlin=perlin, + h_symmetry_time_pct=h_symmetry_time_pct, + v_symmetry_time_pct=v_symmetry_time_pct, + embiggen=embiggen, + embiggen_tiles=embiggen_tiles, + embiggen_strength=embiggen_strength, + inpaint_replace=inpaint_replace, + mask_blur_radius=mask_blur_radius, + safety_checker=self.safety_checker, + seam_size=seam_size, + seam_blur=seam_blur, + seam_strength=seam_strength, + seam_steps=seam_steps, + tile_size=tile_size, + infill_method=infill_method, + force_outpaint=force_outpaint, + inpaint_height=inpaint_height, + inpaint_width=inpaint_width, + enable_image_debugging=enable_image_debugging, + free_gpu_mem=self.free_gpu_mem, + clear_cuda_cache=self.clear_cuda_cache, + ) + + if init_color: + self.correct_colors( + image_list=results, + reference_image_path=init_color, + image_callback=image_callback, + ) + + if upscale is not None or facetool_strength > 0: + self.upscale_and_reconstruct( + results, + upscale=upscale, + upscale_denoise_str=upscale_denoise_str, + facetool=facetool, + strength=facetool_strength, + codeformer_fidelity=codeformer_fidelity, + save_original=save_original, + image_callback=image_callback, + ) + + except KeyboardInterrupt: + # Clear the CUDA cache on an exception + self.clear_cuda_cache() + + if catch_interrupts: + logger.warning("Interrupted** Partial results will be returned.") + else: + raise KeyboardInterrupt + except RuntimeError: + # Clear the CUDA cache on an exception + self.clear_cuda_cache() + + print(traceback.format_exc(), file=sys.stderr) + logger.info("Could not generate image.") + + toc = time.time() + logger.info("Usage stats:") + logger.info(f"{len(results)} image(s) generated in "+"%4.2fs" % (toc - tic)) + self.print_cuda_stats() return results def gather_cuda_stats(self): @@ -662,12 +661,13 @@ class Generate: # used by multiple postfixers # todo: cross-attention control - uc, c, extra_conditioning_info = get_uc_and_c_and_ec( - prompt, - model=self.model, - skip_normalize_legacy_blend=opt.skip_normalize, - log_tokens=log_tokenization, - ) + with self.model_context as model: + uc, c, extra_conditioning_info = get_uc_and_c_and_ec( + prompt, + model=model, + skip_normalize_legacy_blend=opt.skip_normalize, + log_tokens=log_tokenization, + ) if tool in ("gfpgan", "codeformer", "upscale"): if tool == "gfpgan": @@ -852,7 +852,8 @@ class Generate: cn = class_name module = importlib.import_module(mn) constructor = getattr(module, cn) - return constructor(self.model, self.precision) + with self.model_context as model: + return constructor(model, self.precision) def load_model(self): """ @@ -869,8 +870,8 @@ class Generate: If the model fails to load for some reason, will attempt to load the previously- loaded model (if any). If that fallback fails, will raise an AssertionError """ - if self.model_name == model_name and self.model is not None: - return self.model + if self.model_name == model_name and self.model_context is not None: + return self.model_context previous_model_name = self.model_name @@ -881,11 +882,9 @@ class Generate: f'** "{model_name}" is not a known model name. Cannot change.' ) - cache.print_vram_usage() - # have to get rid of all references to model in order # to free it from GPU memory - self.model = None + self.model_context = None self.sampler = None self.generators = {} gc.collect() @@ -902,29 +901,33 @@ class Generate: raise e model_name = previous_model_name - self.model = model_data["model"] - self.width = model_data["width"] - self.height = model_data["height"] - self.model_hash = model_data["hash"] + self.model_context = model_data.context + self.width = 512 + self.height = 512 + self.model_hash = model_data.hash # uncache generators so they pick up new models self.generators = {} set_seed(random.randrange(0, np.iinfo(np.uint32).max)) self.model_name = model_name - self._set_scheduler() # requires self.model_name to be set first - return self.model + with self.model_context as model: + self._set_scheduler(model) # requires self.model_name to be set first + return self.model_context def load_huggingface_concepts(self, concepts: list[str]): - self.model.textual_inversion_manager.load_huggingface_concepts(concepts) + with self.model_context as model: + model.textual_inversion_manager.load_huggingface_concepts(concepts) @property def huggingface_concepts_library(self) -> HuggingFaceConceptsLibrary: - return self.model.textual_inversion_manager.hf_concepts_library + with self.model_context as model: + return model.textual_inversion_manager.hf_concepts_library @property def embedding_trigger_strings(self) -> List[str]: - return self.model.textual_inversion_manager.get_all_trigger_strings() + with self.model_context as model: + return model.textual_inversion_manager.get_all_trigger_strings() def correct_colors(self, image_list, reference_image_path, image_callback=None): reference_image = Image.open(reference_image_path) @@ -1044,8 +1047,8 @@ class Generate: def is_legacy_model(self, model_name) -> bool: return self.model_manager.is_legacy(model_name) - def _set_scheduler(self): - default = self.model.scheduler + def _set_scheduler(self,model): + default = model.scheduler # See https://github.com/huggingface/diffusers/issues/277#issuecomment-1371428672 scheduler_map = dict( @@ -1069,7 +1072,7 @@ class Generate: msg = ( f"Setting Sampler to {self.sampler_name} ({sampler_class.__name__})" ) - self.sampler = sampler_class.from_config(self.model.scheduler.config) + self.sampler = sampler_class.from_config(model.scheduler.config) else: msg = ( f" Unsupported Sampler: {self.sampler_name} "+ diff --git a/invokeai/backend/generator/base.py b/invokeai/backend/generator/base.py index 8ad9dec026..1f0670bef3 100644 --- a/invokeai/backend/generator/base.py +++ b/invokeai/backend/generator/base.py @@ -123,51 +123,51 @@ class InvokeAIGenerator(metaclass=ABCMeta): generator_args.update(keyword_args) model_info = self.model_info - model_name = model_info['model_name'] - model:StableDiffusionGeneratorPipeline = model_info['model'] - model_hash = model_info['hash'] - scheduler: Scheduler = self.get_scheduler( - model=model, - scheduler_name=generator_args.get('scheduler') - ) - uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt,model=model) - gen_class = self._generator_class() - generator = gen_class(model, self.params.precision) - if self.params.variation_amount > 0: - generator.set_variation(generator_args.get('seed'), - generator_args.get('variation_amount'), - generator_args.get('with_variations') - ) + model_name = model_info.name + model_hash = model_info.hash + with model_info.context as model: + scheduler: Scheduler = self.get_scheduler( + model=model, + scheduler_name=generator_args.get('scheduler') + ) + uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt,model=model) + gen_class = self._generator_class() + generator = gen_class(model, self.params.precision) + if self.params.variation_amount > 0: + generator.set_variation(generator_args.get('seed'), + generator_args.get('variation_amount'), + generator_args.get('with_variations') + ) - if isinstance(model, DiffusionPipeline): - for component in [model.unet, model.vae]: - configure_model_padding(component, + if isinstance(model, DiffusionPipeline): + for component in [model.unet, model.vae]: + configure_model_padding(component, + generator_args.get('seamless',False), + generator_args.get('seamless_axes') + ) + else: + configure_model_padding(model, generator_args.get('seamless',False), generator_args.get('seamless_axes') ) - else: - configure_model_padding(model, - generator_args.get('seamless',False), - generator_args.get('seamless_axes') - ) - iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1) - for i in iteration_count: - results = generator.generate(prompt, - conditioning=(uc, c, extra_conditioning_info), - step_callback=step_callback, - sampler=scheduler, - **generator_args, - ) - output = InvokeAIGeneratorOutput( - image=results[0][0], - seed=results[0][1], - attention_maps_images=results[0][2], - model_hash = model_hash, - params=Namespace(model_name=model_name,**generator_args), - ) - if callback: - callback(output) + iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1) + for i in iteration_count: + results = generator.generate(prompt, + conditioning=(uc, c, extra_conditioning_info), + step_callback=step_callback, + sampler=scheduler, + **generator_args, + ) + output = InvokeAIGeneratorOutput( + image=results[0][0], + seed=results[0][1], + attention_maps_images=results[0][2], + model_hash = model_hash, + params=Namespace(model_name=model_name,**generator_args), + ) + if callback: + callback(output) yield output @classmethod @@ -275,7 +275,6 @@ class Embiggen(Txt2Img): from .embiggen import Embiggen return Embiggen - class Generator: downsampling_factor: int latent_channels: int diff --git a/invokeai/backend/model_management/__init__.py b/invokeai/backend/model_management/__init__.py index 07b567ce7a..44b51e6a2a 100644 --- a/invokeai/backend/model_management/__init__.py +++ b/invokeai/backend/model_management/__init__.py @@ -2,4 +2,4 @@ Initialization file for invokeai.backend.model_management """ from .model_manager import ModelManager -from .model_cache import ModelCache, ModelStatus +from .model_cache import ModelCache, ModelStatus, SDModelType diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index 95b4e165f6..265d363475 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -78,6 +78,10 @@ class UnscannableModelException(Exception): "Raised when picklescan is unable to scan a legacy model file" pass +class ModelLocker(object): + "Forward declaration" + pass + class ModelCache(object): def __init__( self, @@ -112,8 +116,6 @@ class ModelCache(object): self.loaded_models: set = set() # set of model keys loaded in GPU self.locked_models: Counter = Counter() # set of model keys locked in GPU - - @contextlib.contextmanager def get_model( self, repo_id_or_path: Union[str,Path], @@ -124,7 +126,7 @@ class ModelCache(object): legacy_info: LegacyInfo=None, attach_model_part: Tuple[SDModelType, str] = (None,None), gpu_load: bool=True, - )->Generator[ModelClass, None, None]: + )->ModelLocker: # ?? what does it return ''' Load and return a HuggingFace model wrapped in a context manager generator, with RAM caching. Use like this: @@ -188,29 +190,45 @@ class ModelCache(object): if submodel: model = getattr(model, submodel.name) - if gpu_load and hasattr(model,'to'): - try: - self.loaded_models.add(key) - self.locked_models[key] += 1 - if self.lazy_offloading: - self._offload_unlocked_models() - self.logger.debug(f'Loading {key} into {self.execution_device}') - model.to(self.execution_device) # move into GPU - self._print_cuda_stats() - yield model - finally: - self.locked_models[key] -= 1 - if not self.lazy_offloading: - self._offload_unlocked_models() - self._print_cuda_stats() - else: - # in the event that the caller wants the model in RAM, we - # move it into CPU if it is in GPU and not locked - if hasattr(model,'to') and (key in self.loaded_models - and self.locked_models[key] == 0): - model.to(self.storage_device) - self.loaded_models.remove(key) - yield model + return self.ModelLocker(self, key, model, gpu_load) + + class ModelLocker(object): + def __init__(self, cache, key, model, gpu_load): + self.gpu_load = gpu_load + self.cache = cache + self.key = key + # This will keep a copy of the model in RAM until the locker + # is garbage collected. Needs testing! + self.model = model + + def __enter__(self)->ModelClass: + cache = self.cache + key = self.key + model = self.model + if self.gpu_load and hasattr(model,'to'): + cache.loaded_models.add(key) + cache.locked_models[key] += 1 + if cache.lazy_offloading: + cache._offload_unlocked_models() + cache.logger.debug(f'Loading {key} into {cache.execution_device}') + model.to(cache.execution_device) # move into GPU + cache._print_cuda_stats() + else: + # in the event that the caller wants the model in RAM, we + # move it into CPU if it is in GPU and not locked + if hasattr(model,'to') and (key in cache.loaded_models + and cache.locked_models[key] == 0): + model.to(cache.storage_device) + cache.loaded_models.remove(key) + return model + + def __exit__(self, type, value, traceback): + key = self.key + cache = self.cache + cache.locked_models[key] -= 1 + if not cache.lazy_offloading: + cache._offload_unlocked_models() + cache._print_cuda_stats() def attach_part(self, diffusers_model: StableDiffusionPipeline, @@ -381,10 +399,11 @@ class ModelCache(object): revisions = [revision] if revision \ else ['fp16','main'] if self.precision==torch.float16 \ else ['main'] - extra_args = {'precision': self.precision} \ - if model_class in DiffusionClasses \ - else {} - + extra_args = {'torch_dtype': self.precision, + 'safety_checker': None}\ + if model_class in DiffusionClasses\ + else {} + # silence transformer and diffuser warnings with SilenceWarnings(): for rev in revisions: diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index 3977ac0ed7..8b6704eb8a 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -69,7 +69,7 @@ class SDModelInfo(): revision: str = None _cache: ModelCache = None - + @property def status(self)->ModelStatus: '''Return load status of this model as a model_cache.ModelStatus enum''' if not self._cache: @@ -106,7 +106,7 @@ class ModelManager(object): config_path: Path, device_type: torch.device = CUDA_DEVICE, precision: torch.dtype = torch.float16, - max_models=DEFAULT_MAX_MODELS, + max_loaded_models=DEFAULT_MAX_MODELS, sequential_offload=False, logger: types.ModuleType = logger, ): @@ -119,7 +119,7 @@ class ModelManager(object): self.config_path = config_path self.config = OmegaConf.load(self.config_path) self.cache = ModelCache( - max_models=max_models, + max_models=max_loaded_models, execution_device = device_type, precision = precision, sequential_offload = sequential_offload, @@ -164,7 +164,7 @@ class ModelManager(object): if mconfig.get('vae'): legacy.vae_file = global_resolve_path(mconfig.vae) elif format=='diffusers': - location = mconfig.repo_id + location = mconfig.get('repo_id') or mconfig.get('path') revision = mconfig.get('revision') else: raise InvalidModelError( diff --git a/invokeai/backend/prompting/conditioning.py b/invokeai/backend/prompting/conditioning.py index d9130ace04..7c6cc0eea2 100644 --- a/invokeai/backend/prompting/conditioning.py +++ b/invokeai/backend/prompting/conditioning.py @@ -7,6 +7,7 @@ get_uc_and_c_and_ec() get the conditioned and unconditioned latent, an """ import re +import torch from typing import Optional, Union from compel import Compel diff --git a/invokeai/backend/web/invoke_ai_web_server.py b/invokeai/backend/web/invoke_ai_web_server.py index 84478d5cb6..eec02cd9dc 100644 --- a/invokeai/backend/web/invoke_ai_web_server.py +++ b/invokeai/backend/web/invoke_ai_web_server.py @@ -78,7 +78,6 @@ class InvokeAIWebServer: mimetypes.add_type("application/javascript", ".js") mimetypes.add_type("text/css", ".css") # Socket IO - logger = True if args.web_verbose else False engineio_logger = True if args.web_verbose else False max_http_buffer_size = 10000000 @@ -1278,13 +1277,14 @@ class InvokeAIWebServer: eventlet.sleep(0) parsed_prompt, _ = get_prompt_structure(generation_parameters["prompt"]) - tokens = ( - None - if type(parsed_prompt) is Blend - else get_tokens_for_prompt_object( - self.generate.model.tokenizer, parsed_prompt + with self.generate.model_context as model: + tokens = ( + None + if type(parsed_prompt) is Blend + else get_tokens_for_prompt_object( + model.tokenizer, parsed_prompt + ) ) - ) attention_maps_image_base64_url = ( None if attention_maps_image is None diff --git a/invokeai/frontend/CLI/CLI.py b/invokeai/frontend/CLI/CLI.py index aa0c4bea5f..0c984080a6 100644 --- a/invokeai/frontend/CLI/CLI.py +++ b/invokeai/frontend/CLI/CLI.py @@ -109,9 +109,6 @@ def main(): else: embedding_path = None - # migrate legacy models - ModelManager.migrate_models() - # load the infile as a list of lines if opt.infile: try: @@ -197,7 +194,7 @@ def main_loop(gen, opt): # changing the history file midstream when the output directory is changed. completer = get_completer(opt, models=gen.model_manager.list_models()) set_default_output_dir(opt, completer) - if gen.model: + if gen.model_context: add_embedding_terms(gen, completer) output_cntr = completer.get_current_history_length() + 1 @@ -1080,7 +1077,8 @@ def add_embedding_terms(gen, completer): Called after setting the model, updates the autocompleter with any terms loaded by the embedding manager. """ - trigger_strings = gen.model.textual_inversion_manager.get_all_trigger_strings() + with gen.model_context as model: + trigger_strings = model.textual_inversion_manager.get_all_trigger_strings() completer.add_embedding_terms(trigger_strings) @@ -1222,6 +1220,7 @@ def report_model_error(opt: Namespace, e: Exception): logger.warning( "This can be caused by a missing or corrupted models file, and can sometimes be fixed by (re)installing the models." ) + traceback.print_exc() yes_to_all = os.environ.get("INVOKE_MODEL_RECONFIGURE") if yes_to_all: logger.warning(