mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-01-31 11:38:03 -05:00
It was only used for a single color conversion function. Replaced with cv2 code, tested functionality to confirm it works the same.
153 lines
5.4 KiB
Python
153 lines
5.4 KiB
Python
from pathlib import Path
|
|
from typing import Dict
|
|
|
|
import huggingface_hub
|
|
import numpy as np
|
|
import onnxruntime as ort
|
|
import torch
|
|
from PIL import Image
|
|
|
|
from invokeai.backend.image_util.dw_openpose.onnxdet import inference_detector
|
|
from invokeai.backend.image_util.dw_openpose.onnxpose import inference_pose
|
|
from invokeai.backend.image_util.dw_openpose.utils import NDArrayInt, draw_bodypose, draw_facepose, draw_handpose
|
|
from invokeai.backend.image_util.util import np_to_pil
|
|
from invokeai.backend.util.devices import TorchDevice
|
|
|
|
|
|
class DWOpenposeDetector:
|
|
"""
|
|
Code from the original implementation of the DW Openpose Detector.
|
|
Credits: https://github.com/IDEA-Research/DWPose
|
|
"""
|
|
|
|
hf_repo_id = "yzd-v/DWPose"
|
|
hf_filename_onnx_det = "yolox_l.onnx"
|
|
hf_filename_onnx_pose = "dw-ll_ucoco_384.onnx"
|
|
|
|
@classmethod
|
|
def get_model_url_det(cls) -> str:
|
|
"""Returns the URL for the detection model."""
|
|
return huggingface_hub.hf_hub_url(cls.hf_repo_id, cls.hf_filename_onnx_det)
|
|
|
|
@classmethod
|
|
def get_model_url_pose(cls) -> str:
|
|
"""Returns the URL for the pose model."""
|
|
return huggingface_hub.hf_hub_url(cls.hf_repo_id, cls.hf_filename_onnx_pose)
|
|
|
|
@staticmethod
|
|
def create_onnx_inference_session(model_path: Path) -> ort.InferenceSession:
|
|
"""Creates an ONNX Inference Session for the given model path, using the appropriate execution provider based on
|
|
the device type."""
|
|
|
|
device = TorchDevice.choose_torch_device()
|
|
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]
|
|
return ort.InferenceSession(path_or_bytes=model_path, providers=providers)
|
|
|
|
def __init__(self, session_det: ort.InferenceSession, session_pose: ort.InferenceSession):
|
|
self.session_det = session_det
|
|
self.session_pose = session_pose
|
|
|
|
def pose_estimation(self, np_image: np.ndarray):
|
|
"""Does the pose estimation on the given image and returns the keypoints and scores."""
|
|
|
|
det_result = inference_detector(self.session_det, np_image)
|
|
keypoints, scores = inference_pose(self.session_pose, det_result, np_image)
|
|
|
|
keypoints_info = np.concatenate((keypoints, scores[..., None]), axis=-1)
|
|
# compute neck joint
|
|
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
|
|
# neck score when visualizing pred
|
|
neck[:, 2:4] = np.logical_and(keypoints_info[:, 5, 2:4] > 0.3, keypoints_info[:, 6, 2:4] > 0.3).astype(int)
|
|
new_keypoints_info = np.insert(keypoints_info, 17, neck, axis=1)
|
|
mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]
|
|
openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]
|
|
new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx]
|
|
keypoints_info = new_keypoints_info
|
|
|
|
keypoints, scores = keypoints_info[..., :2], keypoints_info[..., 2]
|
|
|
|
return keypoints, scores
|
|
|
|
def run(
|
|
self,
|
|
image: Image.Image,
|
|
draw_face: bool = False,
|
|
draw_body: bool = True,
|
|
draw_hands: bool = False,
|
|
) -> Image.Image:
|
|
"""Detects the pose in the given image and returns an solid black image with pose drawn on top, suitable for
|
|
use with a ControlNet."""
|
|
|
|
np_image = np.array(image)
|
|
H, W, C = np_image.shape
|
|
|
|
with torch.no_grad():
|
|
candidate, subset = self.pose_estimation(np_image)
|
|
nums, keys, locs = candidate.shape
|
|
candidate[..., 0] /= float(W)
|
|
candidate[..., 1] /= float(H)
|
|
body = candidate[:, :18].copy()
|
|
body = body.reshape(nums * 18, locs)
|
|
score = subset[:, :18]
|
|
for i in range(len(score)):
|
|
for j in range(len(score[i])):
|
|
if score[i][j] > 0.3:
|
|
score[i][j] = int(18 * i + j)
|
|
else:
|
|
score[i][j] = -1
|
|
|
|
un_visible = subset < 0.3
|
|
candidate[un_visible] = -1
|
|
|
|
# foot = candidate[:, 18:24]
|
|
|
|
faces = candidate[:, 24:92]
|
|
|
|
hands = candidate[:, 92:113]
|
|
hands = np.vstack([hands, candidate[:, 113:]])
|
|
|
|
bodies = {"candidate": body, "subset": score}
|
|
pose = {"bodies": bodies, "hands": hands, "faces": faces}
|
|
|
|
return DWOpenposeDetector.draw_pose(
|
|
pose, H, W, draw_face=draw_face, draw_hands=draw_hands, draw_body=draw_body
|
|
)
|
|
|
|
@staticmethod
|
|
def draw_pose(
|
|
pose: Dict[str, NDArrayInt | Dict[str, NDArrayInt]],
|
|
H: int,
|
|
W: int,
|
|
draw_face: bool = True,
|
|
draw_body: bool = True,
|
|
draw_hands: bool = True,
|
|
) -> Image.Image:
|
|
"""Draws the pose on a black image and returns it as a PIL Image."""
|
|
|
|
bodies = pose["bodies"]
|
|
faces = pose["faces"]
|
|
hands = pose["hands"]
|
|
|
|
assert isinstance(bodies, dict)
|
|
candidate = bodies["candidate"]
|
|
|
|
assert isinstance(bodies, dict)
|
|
subset = bodies["subset"]
|
|
|
|
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
|
|
|
|
if draw_body:
|
|
canvas = draw_bodypose(canvas, candidate, subset)
|
|
|
|
if draw_hands:
|
|
assert isinstance(hands, np.ndarray)
|
|
canvas = draw_handpose(canvas, hands)
|
|
|
|
if draw_face:
|
|
assert isinstance(hands, np.ndarray)
|
|
canvas = draw_facepose(canvas, faces) # type: ignore
|
|
|
|
dwpose_image = np_to_pil(canvas)
|
|
|
|
return dwpose_image
|