Object Oriented Annotators

2026-04-24 03:00:54 -04:00 · 2023-02-15 18:41:33 -08:00
parent f8a359543a
commit 7e04ea233e
15 changed files with 129 additions and 106 deletions
--- a/annotator/canny/init.py
+++ b/annotator/canny/init.py
@@ -1,5 +1,6 @@
 import cv2


-def apply_canny(img, low_threshold, high_threshold):
-    return cv2.Canny(img, low_threshold, high_threshold)
+class CannyDetector:
+    def __call__(self, img, low_threshold, high_threshold):
+        return cv2.Canny(img, low_threshold, high_threshold)
--- a/annotator/hed/init.py
+++ b/annotator/hed/init.py
@@ -91,22 +91,22 @@ class Network(torch.nn.Module):

        return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1))
    # end
-# end


-netNetwork = Network().cuda().eval()
+class HEDdetector:
+    def __init__(self):
+        self.netNetwork = Network().cuda().eval()

-
-def apply_hed(input_image):
-    assert input_image.ndim == 3
-    input_image = input_image[:, :, ::-1].copy()
-    with torch.no_grad():
-        image_hed = torch.from_numpy(input_image).float().cuda()
-        image_hed = image_hed / 255.0
-        image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
-        edge = netNetwork(image_hed)[0]
-        edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
-        return edge[0]
+    def __call__(self, input_image):
+        assert input_image.ndim == 3
+        input_image = input_image[:, :, ::-1].copy()
+        with torch.no_grad():
+            image_hed = torch.from_numpy(input_image).float().cuda()
+            image_hed = image_hed / 255.0
+            image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
+            edge = self.netNetwork(image_hed)[0]
+            edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
+            return edge[0]


 def nms(x, t, s):
--- a/annotator/midas/init.py
+++ b/annotator/midas/init.py
@@ -5,32 +5,34 @@ import torch
 from einops import rearrange
 from .api import MiDaSInference

-model = MiDaSInference(model_type="dpt_hybrid").cuda()

+class MidasDetector:
+    def __init__(self):
+        self.model = MiDaSInference(model_type="dpt_hybrid").cuda()

-def apply_midas(input_image, a=np.pi * 2.0, bg_th=0.1):
-    assert input_image.ndim == 3
-    image_depth = input_image
-    with torch.no_grad():
-        image_depth = torch.from_numpy(image_depth).float().cuda()
-        image_depth = image_depth / 127.5 - 1.0
-        image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
-        depth = model(image_depth)[0]
+    def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1):
+        assert input_image.ndim == 3
+        image_depth = input_image
+        with torch.no_grad():
+            image_depth = torch.from_numpy(image_depth).float().cuda()
+            image_depth = image_depth / 127.5 - 1.0
+            image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
+            depth = self.model(image_depth)[0]

-        depth_pt = depth.clone()
-        depth_pt -= torch.min(depth_pt)
-        depth_pt /= torch.max(depth_pt)
-        depth_pt = depth_pt.cpu().numpy()
-        depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
+            depth_pt = depth.clone()
+            depth_pt -= torch.min(depth_pt)
+            depth_pt /= torch.max(depth_pt)
+            depth_pt = depth_pt.cpu().numpy()
+            depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)

-        depth_np = depth.cpu().numpy()
-        x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
-        y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
-        z = np.ones_like(x) * a
-        x[depth_pt < bg_th] = 0
-        y[depth_pt < bg_th] = 0
-        normal = np.stack([x, y, z], axis=2)
-        normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
-        normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
+            depth_np = depth.cpu().numpy()
+            x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
+            y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
+            z = np.ones_like(x) * a
+            x[depth_pt < bg_th] = 0
+            y[depth_pt < bg_th] = 0
+            normal = np.stack([x, y, z], axis=2)
+            normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
+            normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)

-        return depth_image, normal_image
+            return depth_image, normal_image
--- a/annotator/mlsd/init.py
+++ b/annotator/mlsd/init.py
@@ -4,27 +4,28 @@ import torch
 import os

 from einops import rearrange
-from .models.mbv2_mlsd_tiny import  MobileV2_MLSD_Tiny
-from .models.mbv2_mlsd_large import  MobileV2_MLSD_Large
-from .utils import  pred_lines
+from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
+from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
+from .utils import pred_lines


-model_path = './annotator/ckpts/mlsd_large_512_fp32.pth'
-model = MobileV2_MLSD_Large()
-model.load_state_dict(torch.load(model_path), strict=True)
-model = model.cuda().eval()
+class MLSDdetector:
+    def __init__(self):
+        model_path = './annotator/ckpts/mlsd_large_512_fp32.pth'
+        model = MobileV2_MLSD_Large()
+        model.load_state_dict(torch.load(model_path), strict=True)
+        self.model = model.cuda().eval()

-
-def apply_mlsd(input_image, thr_v, thr_d):
-    assert input_image.ndim == 3
-    img = input_image
-    img_output = np.zeros_like(img)
-    try:
-        with torch.no_grad():
-            lines = pred_lines(img, model, [img.shape[0], img.shape[1]], thr_v, thr_d)
-            for line in lines:
-                x_start, y_start, x_end, y_end = [int(val) for val in line]
-                cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1)
-    except Exception as e:
-        pass
-    return img_output[:, :, 0]
+    def __call__(self, input_image, thr_v, thr_d):
+        assert input_image.ndim == 3
+        img = input_image
+        img_output = np.zeros_like(img)
+        try:
+            with torch.no_grad():
+                lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d)
+                for line in lines:
+                    x_start, y_start, x_end, y_end = [int(val) for val in line]
+                    cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1)
+        except Exception as e:
+            pass
+        return img_output[:, :, 0]
--- a/annotator/openpose/init.py
+++ b/annotator/openpose/init.py
@@ -7,23 +7,25 @@ from . import util
 from .body import Body
 from .hand import Hand

-body_estimation = Body('./annotator/ckpts/body_pose_model.pth')
-hand_estimation = Hand('./annotator/ckpts/hand_pose_model.pth')

+class OpenposeDetector:
+    def __init__(self):
+        self.body_estimation = Body('./annotator/ckpts/body_pose_model.pth')
+        self.hand_estimation = Hand('./annotator/ckpts/hand_pose_model.pth')

-def apply_openpose(oriImg, hand=False):
-    oriImg = oriImg[:, :, ::-1].copy()
-    with torch.no_grad():
-        candidate, subset = body_estimation(oriImg)
-        canvas = np.zeros_like(oriImg)
-        canvas = util.draw_bodypose(canvas, candidate, subset)
-        if hand:
-            hands_list = util.handDetect(candidate, subset, oriImg)
-            all_hand_peaks = []
-            for x, y, w, is_left in hands_list:
-                peaks = hand_estimation(oriImg[y:y+w, x:x+w, :])
-                peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
-                peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
-                all_hand_peaks.append(peaks)
-            canvas = util.draw_handpose(canvas, all_hand_peaks)
-        return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
+    def __call__(self, oriImg, hand=False):
+        oriImg = oriImg[:, :, ::-1].copy()
+        with torch.no_grad():
+            candidate, subset = self.body_estimation(oriImg)
+            canvas = np.zeros_like(oriImg)
+            canvas = util.draw_bodypose(canvas, candidate, subset)
+            if hand:
+                hands_list = util.handDetect(candidate, subset, oriImg)
+                all_hand_peaks = []
+                for x, y, w, is_left in hands_list:
+                    peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
+                    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
+                    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
+                    all_hand_peaks.append(peaks)
+                canvas = util.draw_handpose(canvas, all_hand_peaks)
+            return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
--- a/annotator/uniformer/init.py
+++ b/annotator/uniformer/init.py
@@ -2,12 +2,13 @@ from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor,
 from annotator.uniformer.mmseg.core.evaluation import get_palette


-checkpoint_file = "annotator/ckpts/upernet_global_small.pth"
-config_file = 'annotator/uniformer/exp/upernet_global_small/config.py'
-model = init_segmentor(config_file, checkpoint_file).cuda()
+class UniformerDetector:
+    def __init__(self):
+        checkpoint_file = "annotator/ckpts/upernet_global_small.pth"
+        config_file = 'annotator/uniformer/exp/upernet_global_small/config.py'
+        self.model = init_segmentor(config_file, checkpoint_file).cuda()

-
-def apply_uniformer(img):
-    result = inference_segmentor(model, img)
-    res_img = show_result_pyplot(model, img, result, get_palette('ade'), opacity=1)
-    return res_img
+    def __call__(self, img):
+        result = inference_segmentor(self.model, img)
+        res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1)
+        return res_img
--- a/gradio_annotator.py
+++ b/gradio_annotator.py
@@ -10,8 +10,8 @@ def canny(img, res, l, h):
    img = resize_image(HWC3(img), res)
    global model_canny
    if model_canny is None:
-        from annotator.canny import apply_canny
-        model_canny = apply_canny
+        from annotator.canny import CannyDetector
+        model_canny = CannyDetector()
    result = model_canny(img, l, h)
    return [result]

@@ -23,8 +23,8 @@ def hed(img, res):
    img = resize_image(HWC3(img), res)
    global model_hed
    if model_hed is None:
-        from annotator.hed import apply_hed
-        model_hed = apply_hed
+        from annotator.hed import HEDdetector
+        model_hed = HEDdetector()
    result = model_hed(img)
    return [result]

@@ -36,8 +36,8 @@ def mlsd(img, res, thr_v, thr_d):
    img = resize_image(HWC3(img), res)
    global model_mlsd
    if model_mlsd is None:
-        from annotator.mlsd import apply_mlsd
-        model_mlsd = apply_mlsd
+        from annotator.mlsd import MLSDdetector
+        model_mlsd = MLSDdetector()
    result = model_mlsd(img, thr_v, thr_d)
    return [result]

@@ -49,8 +49,8 @@ def midas(img, res, a):
    img = resize_image(HWC3(img), res)
    global model_midas
    if model_midas is None:
-        from annotator.midas import apply_midas
-        model_midas = apply_midas
+        from annotator.midas import MidasDetector
+        model_midas = MidasDetector()
    results = model_midas(img, a)
    return results

@@ -62,8 +62,8 @@ def openpose(img, res, has_hand):
    img = resize_image(HWC3(img), res)
    global model_openpose
    if model_openpose is None:
-        from annotator.openpose import apply_openpose
-        model_openpose = apply_openpose
+        from annotator.openpose import OpenposeDetector
+        model_openpose = OpenposeDetector()
    result, _ = model_openpose(img, has_hand)
    return [result]

@@ -75,8 +75,8 @@ def uniformer(img, res):
    img = resize_image(HWC3(img), res)
    global model_uniformer
    if model_uniformer is None:
-        from annotator.uniformer import apply_uniformer
-        model_uniformer = apply_uniformer
+        from annotator.uniformer import UniformerDetector
+        model_uniformer = UniformerDetector()
    result = model_uniformer(img)
    return [result]

--- a/gradio_canny2image.py
+++ b/gradio_canny2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.canny import apply_canny
+from annotator.canny import CannyDetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_canny = CannyDetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_canny.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_depth2image.py
+++ b/gradio_depth2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.midas import apply_midas
+from annotator.midas import MidasDetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_midas = MidasDetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_depth.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_fake_scribble2image.py
+++ b/gradio_fake_scribble2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.hed import apply_hed, nms
+from annotator.hed import HEDdetector, nms
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_hed = HEDdetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_scribble.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_hed2image.py
+++ b/gradio_hed2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.hed import apply_hed
+from annotator.hed import HEDdetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_hed = HEDdetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_hed.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_hough2image.py
+++ b/gradio_hough2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.mlsd import apply_mlsd
+from annotator.mlsd import MLSDdetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_mlsd = MLSDdetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_mlsd.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_normal2image.py
+++ b/gradio_normal2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.midas import apply_midas
+from annotator.midas import MidasDetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_midas = MidasDetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_normal.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_pose2image.py
+++ b/gradio_pose2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.openpose import apply_openpose
+from annotator.openpose import OpenposeDetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_openpose = OpenposeDetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_openpose.pth', location='cuda'))
 model = model.cuda()
--- a/gradio_seg2image.py
+++ b/gradio_seg2image.py
@@ -10,11 +10,13 @@ import random

 from pytorch_lightning import seed_everything
 from annotator.util import resize_image, HWC3
-from annotator.uniformer import apply_uniformer
+from annotator.uniformer import UniformerDetector
 from cldm.model import create_model, load_state_dict
 from ldm.models.diffusion.ddim import DDIMSampler


+apply_uniformer = UniformerDetector()
+
 model = create_model('./models/cldm_v15.yaml').cpu()
 model.load_state_dict(load_state_dict('./models/control_sd15_seg.pth', location='cuda'))
 model = model.cuda()