Files
InvokeAI/invokeai/backend/image_util/dw_openpose/__init__.py
psychedelicious 8d32ede082 tidy(nodes): remove matplotlib dependency
It was only used for a single color conversion function. Replaced with cv2 code, tested functionality to confirm it works the same.
2025-04-04 18:42:13 +11:00

153 lines
5.4 KiB
Python

from pathlib import Path
from typing import Dict
import huggingface_hub
import numpy as np
import onnxruntime as ort
import torch
from PIL import Image
from invokeai.backend.image_util.dw_openpose.onnxdet import inference_detector
from invokeai.backend.image_util.dw_openpose.onnxpose import inference_pose
from invokeai.backend.image_util.dw_openpose.utils import NDArrayInt, draw_bodypose, draw_facepose, draw_handpose
from invokeai.backend.image_util.util import np_to_pil
from invokeai.backend.util.devices import TorchDevice
class DWOpenposeDetector:
"""
Code from the original implementation of the DW Openpose Detector.
Credits: https://github.com/IDEA-Research/DWPose
"""
hf_repo_id = "yzd-v/DWPose"
hf_filename_onnx_det = "yolox_l.onnx"
hf_filename_onnx_pose = "dw-ll_ucoco_384.onnx"
@classmethod
def get_model_url_det(cls) -> str:
"""Returns the URL for the detection model."""
return huggingface_hub.hf_hub_url(cls.hf_repo_id, cls.hf_filename_onnx_det)
@classmethod
def get_model_url_pose(cls) -> str:
"""Returns the URL for the pose model."""
return huggingface_hub.hf_hub_url(cls.hf_repo_id, cls.hf_filename_onnx_pose)
@staticmethod
def create_onnx_inference_session(model_path: Path) -> ort.InferenceSession:
"""Creates an ONNX Inference Session for the given model path, using the appropriate execution provider based on
the device type."""
device = TorchDevice.choose_torch_device()
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]
return ort.InferenceSession(path_or_bytes=model_path, providers=providers)
def __init__(self, session_det: ort.InferenceSession, session_pose: ort.InferenceSession):
self.session_det = session_det
self.session_pose = session_pose
def pose_estimation(self, np_image: np.ndarray):
"""Does the pose estimation on the given image and returns the keypoints and scores."""
det_result = inference_detector(self.session_det, np_image)
keypoints, scores = inference_pose(self.session_pose, det_result, np_image)
keypoints_info = np.concatenate((keypoints, scores[..., None]), axis=-1)
# compute neck joint
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
# neck score when visualizing pred
neck[:, 2:4] = np.logical_and(keypoints_info[:, 5, 2:4] > 0.3, keypoints_info[:, 6, 2:4] > 0.3).astype(int)
new_keypoints_info = np.insert(keypoints_info, 17, neck, axis=1)
mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]
openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]
new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx]
keypoints_info = new_keypoints_info
keypoints, scores = keypoints_info[..., :2], keypoints_info[..., 2]
return keypoints, scores
def run(
self,
image: Image.Image,
draw_face: bool = False,
draw_body: bool = True,
draw_hands: bool = False,
) -> Image.Image:
"""Detects the pose in the given image and returns an solid black image with pose drawn on top, suitable for
use with a ControlNet."""
np_image = np.array(image)
H, W, C = np_image.shape
with torch.no_grad():
candidate, subset = self.pose_estimation(np_image)
nums, keys, locs = candidate.shape
candidate[..., 0] /= float(W)
candidate[..., 1] /= float(H)
body = candidate[:, :18].copy()
body = body.reshape(nums * 18, locs)
score = subset[:, :18]
for i in range(len(score)):
for j in range(len(score[i])):
if score[i][j] > 0.3:
score[i][j] = int(18 * i + j)
else:
score[i][j] = -1
un_visible = subset < 0.3
candidate[un_visible] = -1
# foot = candidate[:, 18:24]
faces = candidate[:, 24:92]
hands = candidate[:, 92:113]
hands = np.vstack([hands, candidate[:, 113:]])
bodies = {"candidate": body, "subset": score}
pose = {"bodies": bodies, "hands": hands, "faces": faces}
return DWOpenposeDetector.draw_pose(
pose, H, W, draw_face=draw_face, draw_hands=draw_hands, draw_body=draw_body
)
@staticmethod
def draw_pose(
pose: Dict[str, NDArrayInt | Dict[str, NDArrayInt]],
H: int,
W: int,
draw_face: bool = True,
draw_body: bool = True,
draw_hands: bool = True,
) -> Image.Image:
"""Draws the pose on a black image and returns it as a PIL Image."""
bodies = pose["bodies"]
faces = pose["faces"]
hands = pose["hands"]
assert isinstance(bodies, dict)
candidate = bodies["candidate"]
assert isinstance(bodies, dict)
subset = bodies["subset"]
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
if draw_body:
canvas = draw_bodypose(canvas, candidate, subset)
if draw_hands:
assert isinstance(hands, np.ndarray)
canvas = draw_handpose(canvas, hands)
if draw_face:
assert isinstance(hands, np.ndarray)
canvas = draw_facepose(canvas, faces) # type: ignore
dwpose_image = np_to_pil(canvas)
return dwpose_image