This repository has been archived by the owner on Jun 15, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 93
/
hand_detector.py
139 lines (110 loc) · 5.25 KB
/
hand_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import cv2
import argparse
import numpy as np
from scipy.ndimage.filters import gaussian_filter
import chainer
from chainer import cuda, serializers, functions as F
from entity import params
from models.HandNet import HandNet
chainer.using_config('enable_backprop', False)
class HandDetector(object):
def __init__(self, arch=None, weights_file=None, model=None, device=-1):
print('Loading HandNet...')
self.model = params['archs'][arch]()
serializers.load_npz(weights_file, self.model)
self.device = device
if self.device >= 0:
cuda.get_device_from_id(device).use()
self.model.to_gpu()
# create gaussian filter
ksize = params['ksize']
kernel = cuda.to_gpu(self.create_gaussian_kernel(sigma=params['gaussian_sigma'], ksize=ksize))
self.gaussian_kernel = kernel
def __call__(self, hand_img, fast_mode=False, hand_type="right"):
if hand_type == "left":
hand_img = cv2.flip(hand_img, 1)
hand_img_h, hand_img_w, _ = hand_img.shape
resized_image = cv2.resize(hand_img, (params["hand_inference_img_size"], params["hand_inference_img_size"]))
x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5
if self.device >= 0:
x_data = cuda.to_gpu(x_data)
hs = self.model(x_data)
heatmaps = F.resize_images(hs[-1], (hand_img_h, hand_img_w)).data[0]
if self.device >= 0:
heatmaps = heatmaps.get()
if hand_type == "left":
heatmaps = cv2.flip(heatmaps.transpose(1, 2, 0), 1).transpose(2, 0, 1)
keypoints = self.compute_peaks_from_heatmaps(heatmaps)
return keypoints
# compute gaussian filter
def create_gaussian_kernel(self, sigma=1, ksize=5):
center = int(ksize / 2)
kernel = np.zeros((1, 1, ksize, ksize), dtype=np.float32)
for y in range(ksize):
distance_y = abs(y-center)
for x in range(ksize):
distance_x = abs(x-center)
kernel[0][0][y][x] = 1/(sigma**2 * 2 * np.pi) * np.exp(-(distance_x**2 + distance_y**2)/(2 * sigma**2))
return kernel
def compute_peaks_from_heatmaps(self, heatmaps):
keypoints = []
xp = cuda.get_array_module(heatmaps)
if xp == np:
for i in range(heatmaps.shape[0] - 1):
heatmap = gaussian_filter(heatmaps[i], sigma=params['gaussian_sigma'])
max_value = heatmap.max()
if max_value > params['hand_heatmap_peak_thresh']:
coords = np.array(np.where(heatmap==max_value)).flatten().tolist()
keypoints.append([coords[1], coords[0], max_value]) # x, y, conf
else:
keypoints.append(None)
else:
heatmaps = F.convolution_2d(heatmaps[:, None], self.gaussian_kernel, stride=1, pad=int(params['ksize']/2)).data.squeeze().get()
for heatmap in heatmaps[:-1]:
max_value = heatmap.max()
if max_value > params['hand_heatmap_peak_thresh']:
coords = np.array(np.where(heatmap==max_value)).flatten().tolist()
keypoints.append([coords[1], coords[0], max_value]) # x, y, conf
else:
keypoints.append(None)
return keypoints
def draw_hand_keypoints(orig_img, hand_keypoints, left_top):
img = orig_img.copy()
left, top = left_top
finger_colors = [
(0, 0, 255),
(0, 255, 255),
(0, 255, 0),
(255, 0, 0),
(255, 0, 255),
]
for i, finger_indices in enumerate(params["fingers_indices"]):
for finger_line_index in finger_indices:
keypoint_from = hand_keypoints[finger_line_index[0]]
keypoint_to = hand_keypoints[finger_line_index[1]]
if keypoint_from:
keypoint_from_x, keypoint_from_y, _ = keypoint_from
cv2.circle(img, (keypoint_from_x + left, keypoint_from_y + top), 3, finger_colors[i], -1)
if keypoint_to:
keypoint_to_x, keypoint_to_y, _ = keypoint_to
cv2.circle(img, (keypoint_to_x + left, keypoint_to_y + top), 3, finger_colors[i], -1)
if keypoint_from and keypoint_to:
cv2.line(img, (keypoint_from_x + left, keypoint_from_y + top), (keypoint_to_x + left, keypoint_to_y + top), finger_colors[i], 1)
return img
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Face detector')
parser.add_argument('arch', choices=params['archs'].keys(), default='facenet', help='Model architecture')
parser.add_argument('weights', help='weights file path')
parser.add_argument('--img', help='image file path')
parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
# load model
hand_detector = HandDetector(args.arch, args.weights, device=args.gpu)
# read image
img = cv2.imread(args.img)
# inference
hand_keypoints = hand_detector(img, hand_type="right")
# draw and save image
img = draw_hand_keypoints(img, hand_keypoints, (0, 0))
print('Saving result into result.png...')
cv2.imwrite('result.png', img)