Simplify handling of CLIP ViT selection for FLUX IP-Adapter invocation.

2026-04-23 03:00:31 -04:00 · 2024-10-21 19:54:21 +00:00
parent 5546110127
commit 90a906e203
2 changed files with 6 additions and 10 deletions
--- a/invokeai/app/invocations/flux_ip_adapter.py
+++ b/invokeai/app/invocations/flux_ip_adapter.py
@@ -39,11 +39,8 @@ class FluxIPAdapterInvocation(BaseInvocation):
    ip_adapter_model: ModelIdentifierField = InputField(
        description="The IP-Adapter model.", title="IP-Adapter Model", ui_type=UIType.IPAdapterModel
    )
-    clip_vision_model: Literal["ViT-L"] = InputField(
-        description="CLIP Vision model to use. Only applied if the correct CLIP Vision model cannot be detected from "
-        + "the model config.",
-        default="ViT-L",
-    )
+    # Currently, the only known ViT model used by FLUX IP-Adapters is ViT-L.
+    clip_vision_model: Literal["ViT-L"] = InputField(description="CLIP Vision model to use.", default="ViT-L")
    weight: Union[float, List[float]] = InputField(
        default=1, description="The weight given to the IP-Adapter", title="Weight"
    )
@@ -70,11 +67,8 @@ class FluxIPAdapterInvocation(BaseInvocation):
        ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
        assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))

-        if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
-            image_encoder_model_id = ip_adapter_info.image_encoder_model_id
-            image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
-        else:
-            image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+        # Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy.
+        image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]

        image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
            context, image_encoder_model_id, image_encoder_model_name
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -394,6 +394,8 @@ class IPAdapterBaseConfig(ModelConfigBase):
 class IPAdapterInvokeAIConfig(IPAdapterBaseConfig):
    """Model config for IP Adapter diffusers format models."""

+    # TODO(ryand): Should we deprecate this field? From what I can tell, it hasn't been probed correctly for a long
+    # time. Need to go through the history to make sure I'm understanding this fully.
    image_encoder_model_id: str
    format: Literal[ModelFormat.InvokeAI]