Add FLUX IP-Adapter starter models.

This commit is contained in:
Ryan Dick
2024-10-21 22:17:42 +00:00
parent 90a906e203
commit e8cd1bb3d8
3 changed files with 52 additions and 23 deletions

View File

@@ -68,8 +68,9 @@ class FluxIPAdapterInvocation(BaseInvocation):
assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
# Note: There is a IPAdapterInvokeAIConfig.image_encoder_model_id field, but it isn't trustworthy.
image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
image_encoder_starter_model = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
image_encoder_model_id = image_encoder_starter_model.source
image_encoder_model_name = image_encoder_starter_model.name
image_encoder_model = IPAdapterInvocation.get_clip_image_encoder(
context, image_encoder_model_id, image_encoder_model_name
)

View File

@@ -18,6 +18,12 @@ from invokeai.backend.model_manager.config import (
IPAdapterInvokeAIConfig,
ModelType,
)
from invokeai.backend.model_manager.starter_models import (
StarterModel,
clip_vit_l_image_encoder,
ip_adapter_sd_image_encoder,
ip_adapter_sdxl_image_encoder,
)
class IPAdapterField(BaseModel):
@@ -56,10 +62,10 @@ class IPAdapterOutput(BaseInvocationOutput):
ip_adapter: IPAdapterField = OutputField(description=FieldDescriptions.ip_adapter, title="IP-Adapter")
CLIP_VISION_MODEL_MAP = {
"ViT-L": ("InvokeAI/clip-vit-large-patch14", "clip-vit-large-patch14"),
"ViT-H": ("InvokeAI/ip_adapter_sd_image_encoder", "ip_adapter_sd_image_encoder"),
"ViT-G": ("InvokeAI/ip_adapter_sdxl_image_encoder", "ip_adapter_sdxl_image_encoder"),
CLIP_VISION_MODEL_MAP: dict[Literal["ViT-L", "ViT-H", "ViT-G"], StarterModel] = {
"ViT-L": clip_vit_l_image_encoder,
"ViT-H": ip_adapter_sd_image_encoder,
"ViT-G": ip_adapter_sdxl_image_encoder,
}
@@ -116,7 +122,9 @@ class IPAdapterInvocation(BaseInvocation):
image_encoder_model_id = ip_adapter_info.image_encoder_model_id
image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
else:
image_encoder_model_id, image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
image_encoder_starter_model = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
image_encoder_model_id = image_encoder_starter_model.source
image_encoder_model_name = image_encoder_starter_model.name
image_encoder_model = self.get_clip_image_encoder(context, image_encoder_model_id, image_encoder_model_name)

View File

@@ -25,22 +25,6 @@ class StarterModelBundles(BaseModel):
models: list[StarterModel]
ip_adapter_sd_image_encoder = StarterModel(
name="IP Adapter SD1.5 Image Encoder",
base=BaseModelType.StableDiffusion1,
source="InvokeAI/ip_adapter_sd_image_encoder",
description="IP Adapter SD Image Encoder",
type=ModelType.CLIPVision,
)
ip_adapter_sdxl_image_encoder = StarterModel(
name="IP Adapter SDXL Image Encoder",
base=BaseModelType.StableDiffusionXL,
source="InvokeAI/ip_adapter_sdxl_image_encoder",
description="IP Adapter SDXL Image Encoder",
type=ModelType.CLIPVision,
)
cyberrealistic_negative = StarterModel(
name="CyberRealistic Negative v3",
base=BaseModelType.StableDiffusion1,
@@ -49,6 +33,32 @@ cyberrealistic_negative = StarterModel(
type=ModelType.TextualInversion,
)
# region CLIP Image Encoders
ip_adapter_sd_image_encoder = StarterModel(
name="IP Adapter SD1.5 Image Encoder",
base=BaseModelType.StableDiffusion1,
source="InvokeAI/ip_adapter_sd_image_encoder",
description="IP Adapter SD Image Encoder",
type=ModelType.CLIPVision,
)
ip_adapter_sdxl_image_encoder = StarterModel(
name="IP Adapter SDXL Image Encoder",
base=BaseModelType.StableDiffusionXL,
source="InvokeAI/ip_adapter_sdxl_image_encoder",
description="IP Adapter SDXL Image Encoder",
type=ModelType.CLIPVision,
)
# Note: This model is installed from the same source as the CLIPEmbed model below. The model contains both the image
# encoder and the text encoder, but we need separate model entries so that they get loaded correctly.
clip_vit_l_image_encoder = StarterModel(
name="clip-vit-large-patch14",
base=BaseModelType.Any,
source="InvokeAI/clip-vit-large-patch14",
description="CLIP ViT-L Image Encoder",
type=ModelType.CLIPVision,
)
# endregion
# region TextEncoders
t5_base_encoder = StarterModel(
name="t5_base_encoder",
@@ -254,6 +264,14 @@ ip_adapter_sdxl = StarterModel(
type=ModelType.IPAdapter,
dependencies=[ip_adapter_sdxl_image_encoder],
)
ip_adapter_flux = StarterModel(
name="XLabs FLUX IP-Adapter",
base=BaseModelType.Flux,
source="https://huggingface.co/XLabs-AI/flux-ip-adapter/resolve/main/flux-ip-adapter.safetensors",
description="FLUX IP-Adapter",
type=ModelType.IPAdapter,
dependencies=[clip_vit_l_image_encoder],
)
# endregion
# region ControlNet
qr_code_cnet_sd1 = StarterModel(
@@ -555,6 +573,7 @@ STARTER_MODELS: list[StarterModel] = [
ip_adapter_plus_sd1,
ip_adapter_plus_face_sd1,
ip_adapter_sdxl,
ip_adapter_flux,
qr_code_cnet_sd1,
qr_code_cnet_sdxl,
canny_sd1,
@@ -642,6 +661,7 @@ flux_bundle: list[StarterModel] = [
t5_8b_quantized_encoder,
clip_l_encoder,
union_cnet_flux,
ip_adapter_flux,
]
STARTER_BUNDLES: dict[str, list[StarterModel]] = {