mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-04-23 03:00:31 -04:00
Similar to the existing node, but without any resizing and with a revised model loading API that uses the model manager. All code related to the invocation now lives in the Invoke repo.
150 lines
6.5 KiB
Python
150 lines
6.5 KiB
Python
from typing import Mapping
|
|
|
|
import mediapipe as mp
|
|
import numpy
|
|
|
|
mp_drawing = mp.solutions.drawing_utils
|
|
mp_drawing_styles = mp.solutions.drawing_styles
|
|
mp_face_detection = mp.solutions.face_detection # Only for counting faces.
|
|
mp_face_mesh = mp.solutions.face_mesh
|
|
mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION
|
|
mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS
|
|
mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS
|
|
|
|
DrawingSpec = mp.solutions.drawing_styles.DrawingSpec
|
|
PoseLandmark = mp.solutions.drawing_styles.PoseLandmark
|
|
|
|
min_face_size_pixels: int = 64
|
|
f_thick = 2
|
|
f_rad = 1
|
|
right_iris_draw = DrawingSpec(color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad)
|
|
right_eye_draw = DrawingSpec(color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad)
|
|
right_eyebrow_draw = DrawingSpec(color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad)
|
|
left_iris_draw = DrawingSpec(color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad)
|
|
left_eye_draw = DrawingSpec(color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad)
|
|
left_eyebrow_draw = DrawingSpec(color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad)
|
|
mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad)
|
|
head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad)
|
|
|
|
# mp_face_mesh.FACEMESH_CONTOURS has all the items we care about.
|
|
face_connection_spec = {}
|
|
for edge in mp_face_mesh.FACEMESH_FACE_OVAL:
|
|
face_connection_spec[edge] = head_draw
|
|
for edge in mp_face_mesh.FACEMESH_LEFT_EYE:
|
|
face_connection_spec[edge] = left_eye_draw
|
|
for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW:
|
|
face_connection_spec[edge] = left_eyebrow_draw
|
|
# for edge in mp_face_mesh.FACEMESH_LEFT_IRIS:
|
|
# face_connection_spec[edge] = left_iris_draw
|
|
for edge in mp_face_mesh.FACEMESH_RIGHT_EYE:
|
|
face_connection_spec[edge] = right_eye_draw
|
|
for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW:
|
|
face_connection_spec[edge] = right_eyebrow_draw
|
|
# for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS:
|
|
# face_connection_spec[edge] = right_iris_draw
|
|
for edge in mp_face_mesh.FACEMESH_LIPS:
|
|
face_connection_spec[edge] = mouth_draw
|
|
iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw}
|
|
|
|
|
|
def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2):
|
|
"""We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all
|
|
landmarks. Until our PR is merged into mediapipe, we need this separate method."""
|
|
if len(image.shape) != 3:
|
|
raise ValueError("Input image must be H,W,C.")
|
|
image_rows, image_cols, image_channels = image.shape
|
|
if image_channels != 3: # BGR channels
|
|
raise ValueError("Input image must contain three channel bgr data.")
|
|
for idx, landmark in enumerate(landmark_list.landmark):
|
|
if (landmark.HasField("visibility") and landmark.visibility < 0.9) or (
|
|
landmark.HasField("presence") and landmark.presence < 0.5
|
|
):
|
|
continue
|
|
if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0:
|
|
continue
|
|
image_x = int(image_cols * landmark.x)
|
|
image_y = int(image_rows * landmark.y)
|
|
draw_color = None
|
|
if isinstance(drawing_spec, Mapping):
|
|
if drawing_spec.get(idx) is None:
|
|
continue
|
|
else:
|
|
draw_color = drawing_spec[idx].color
|
|
elif isinstance(drawing_spec, DrawingSpec):
|
|
draw_color = drawing_spec.color
|
|
image[image_y - halfwidth : image_y + halfwidth, image_x - halfwidth : image_x + halfwidth, :] = draw_color
|
|
|
|
|
|
def reverse_channels(image):
|
|
"""Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB."""
|
|
# im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order.
|
|
# im[:,:,::[2,1,0]] would also work but makes a copy of the data.
|
|
return image[:, :, ::-1]
|
|
|
|
|
|
def generate_annotation(img_rgb, max_faces: int, min_confidence: float):
|
|
"""
|
|
Find up to 'max_faces' inside the provided input image.
|
|
If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many
|
|
pixels in the image.
|
|
"""
|
|
with mp_face_mesh.FaceMesh(
|
|
static_image_mode=True,
|
|
max_num_faces=max_faces,
|
|
refine_landmarks=True,
|
|
min_detection_confidence=min_confidence,
|
|
) as facemesh:
|
|
img_height, img_width, img_channels = img_rgb.shape
|
|
assert img_channels == 3
|
|
|
|
results = facemesh.process(img_rgb).multi_face_landmarks
|
|
|
|
if results is None:
|
|
print("No faces detected in controlnet image for Mediapipe face annotator.")
|
|
return numpy.zeros_like(img_rgb)
|
|
|
|
# Filter faces that are too small
|
|
filtered_landmarks = []
|
|
for lm in results:
|
|
landmarks = lm.landmark
|
|
face_rect = [
|
|
landmarks[0].x,
|
|
landmarks[0].y,
|
|
landmarks[0].x,
|
|
landmarks[0].y,
|
|
] # Left, up, right, down.
|
|
for i in range(len(landmarks)):
|
|
face_rect[0] = min(face_rect[0], landmarks[i].x)
|
|
face_rect[1] = min(face_rect[1], landmarks[i].y)
|
|
face_rect[2] = max(face_rect[2], landmarks[i].x)
|
|
face_rect[3] = max(face_rect[3], landmarks[i].y)
|
|
if min_face_size_pixels > 0:
|
|
face_width = abs(face_rect[2] - face_rect[0])
|
|
face_height = abs(face_rect[3] - face_rect[1])
|
|
face_width_pixels = face_width * img_width
|
|
face_height_pixels = face_height * img_height
|
|
face_size = min(face_width_pixels, face_height_pixels)
|
|
if face_size >= min_face_size_pixels:
|
|
filtered_landmarks.append(lm)
|
|
else:
|
|
filtered_landmarks.append(lm)
|
|
|
|
# Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start.
|
|
empty = numpy.zeros_like(img_rgb)
|
|
|
|
# Draw detected faces:
|
|
for face_landmarks in filtered_landmarks:
|
|
mp_drawing.draw_landmarks(
|
|
empty,
|
|
face_landmarks,
|
|
connections=face_connection_spec.keys(),
|
|
landmark_drawing_spec=None,
|
|
connection_drawing_spec=face_connection_spec,
|
|
)
|
|
draw_pupils(empty, face_landmarks, iris_landmark_spec, 2)
|
|
|
|
# Flip BGR back to RGB.
|
|
empty = reverse_channels(empty).copy()
|
|
|
|
return empty
|