Simplify handling of CLIP ViT selection for FLUX IP-Adapter invocation.

This commit is contained in:
Ryan Dick
2024-10-21 19:54:21 +00:00
parent 5546110127
commit 90a906e203
2 changed files with 6 additions and 10 deletions

View File

@@ -39,11 +39,8 @@ class FluxIPAdapterInvocation(BaseInvocation):
ip_adapter_model: ModelIdentifierField = InputField(
description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel
)
clip_vision_model: Literal["ViT-L"] = InputField(
description="CLIP Vision model to use. Only applied if the correct CLIP Vision model cannot be detected from "
+ "the model config.",
default="ViT-L",
)
# Currently, the only known ViT model used by FLUX IP-Adapters is ViT-L.
clip_vision_model: Literal["ViT-L"] = InputField(description="CLIP Vision model to use.", default="ViT-L")
weight: Union[float, List[float]] = InputField(
default=1, description="The weight given to the IP-Adapter", title="Weight"
)
@@ -70,11 +67,8 @@ class FluxIPAdapterInvocation(BaseInvocation):
ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
image_encoder_model_id = ip_adapter_info.image_encoder_model_id
image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
else:
image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
# Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy.
image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
context, image_encoder_model_id, image_encoder_model_name

View File

@@ -394,6 +394,8 @@ class IPAdapterBaseConfig(ModelConfigBase):
class IPAdapterInvokeAIConfig(IPAdapterBaseConfig):
"""Model config for IP Adapter diffusers format models."""
# TODO(ryand): Should we deprecate this field? From what I can tell, it hasn't been probed correctly for a long
# time. Need to go through the history to make sure I'm understanding this fully.
image_encoder_model_id: str
format: Literal[ModelFormat.InvokeAI]