forked from rafabs97/headpose_final
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_pointing04.py
executable file
·289 lines (211 loc) · 10.5 KB
/
clean_pointing04.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This file contains a function used to preprocess the Pointing'04 dataset (clean_pointing04) and the function to get the
ground truth pose from the filename of each image, as well as a main function allowing the basic stand-alone processing
of the dataset.
"""
import os
import re
import glob
import cv2
import shutil
from keras_ssd512 import ssd_512
from head_detector_utils import get_head_bboxes, get_cropped_pics
from dataset_utils import class_assign, split_dataset, find_norm_parameters, store_dataset_arrays
def pose_from_filename(img):
'''
Gets ground truth pose values for a picture in Pointing'04 dataset from its filename.
Arguments:
img: Path of the picture for which we want to get the ground truth pose values (if it has the format used
for filenames in Pointing'04 dataset).
Returns:
tilt: Ground truth value for the tilt angle.
pan: Ground truth value for the pan angle.
'''
# Get filename from path.
raw = os.path.basename(img)[11:]
raw = os.path.splitext(raw)[0]
# Get absolute tilt and pan values from filename.
tilt, pan = re.split('[-+]', raw[1:])
# Get actual numerical values from the strings obtained before, plus the sign corresponding to each angle.
tilt = int(raw[0] + tilt)
pan = int(raw[len(raw) - len(pan) - 1] + pan)
# Return tilt and pan values.
return tilt, pan
def clean_pointing04(pointing04_dir, destination_dir, detector, confidence_threshold, out_size, grayscale = False, interpolation = cv2.INTER_LINEAR, start_count = 0, duplicate_until = 0):
'''
Performs the basic processing of the Pointing'04 dataset, obtaining cropped pictures for each head detection in the
original images, and getting the ground truth pose values for each detection from the .mat file.
Arguments:
pointing04_dir: Directory containing Pointing'04 dataset pictures in its root.
destination_dir: Directory where cropped pictures and .csv containing pose values for each crop will be stored.
detector: Keras model used for detecting human heads in pictures.
confidence_threshold: Value used to filter detections (detections must have a confidence value higher than this
value in order to be considered valid).
out_size: Length of each side of final cropped pictures.
start_count: Initial value of the picture count; used to assign filenames to cropped pictures, when AFLW dataset
is not processed first.
duplicate_until: Target number of pictures per class; used in order to augment the size of the dataset by duplicating
pictures on each class. Each pose in the dataset corresponds to a different class.
Returns:
count: Total number of cropped pictures obtained (starting from start_count + 1).
t_ratio: Ratio between the number of true detections and the number of annotated heads in the dataset.
f_ratio: Ratio between the number of false detections and the total number of detections.
'''
# Initialize count.
count = start_count
# If labels.csv exists in destination dir, append pose values; if it doesn't exists then create it.
if os.path.isfile(destination_dir + 'labels.csv'):
file = open(destination_dir + 'labels.csv', 'a')
else:
file = open(destination_dir + 'labels.csv', 'w')
file.write('file,tilt,pan\n')
# Initialize count of true detections, processed annotated heads, false detections and total detections.
t_count = 0
p_count = 0
f_count = 0
d_count = 0
# Create empty arrays for storing pictures in each class.
pics_by_class = [[] for i in range(169)]
# Set number of channels for cropped pictures.
if grayscale == True:
out_shape = (out_size, out_size)
else:
out_shape = (out_size, out_size, 3)
# For each person in the dataset:
for i in range(1, 16):
# Get the path for every picture of that person.
num = '{0:02}'.format(i)
path = pointing04_dir + "Person" + num
images = glob.glob(path + "/*.jpg")
# For each picture:
for img in images:
# Increase processed annotated heads count.
p_count = p_count + 1
# Get ground truth pose values from filename.
tilt, pan = pose_from_filename(img)
# Load the picture.
pic = cv2.imread(img)
# Get detections for the loaded picture.
bboxes = get_head_bboxes(pic, detector, confidence_threshold)
# Increase detections count.
d_count = d_count + len(bboxes)
# Update detection counters.
if len(bboxes) > 1:
f_count = f_count + len(bboxes) - 1
t_count = t_count + 1
else:
t_count = t_count + len(bboxes)
# Get cropped pictures from the loaded picture.
if grayscale == True:
pic = cv2.cvtColor(pic, cv2.COLOR_BGR2GRAY)
c_pics = get_cropped_pics(pic, bboxes, out_size, 0, cropping='small', interpolation=interpolation)
# For each cropped picture:
for c_pic in c_pics:
# If the size of the cropped pic is the expected:
if c_pic.shape == out_shape:
# Store picture.
cv2.imwrite(destination_dir + "pic_" + str(count) + ".jpg", c_pic)
file.write("pic_" + str(count) + ".jpg," + str(tilt) + "," + str(pan) + "\n")
# Assign class to picture from its pose values.
p_class = int(13 * ((tilt + 90) / 15) + ((pan + 90) / 15))
pics_by_class[p_class].append(count)
# Mirror picture
pan = -1 * pan
c_pic = cv2.flip(c_pic, 1)
# Store mirrored picture.
cv2.imwrite(destination_dir + "pic_" + str(count + 1) + ".jpg", c_pic)
file.write("pic_" + str(count + 1) + ".jpg," + str(tilt) + "," + str(pan) + "\n")
# Calculate class for mirrored picture.
p_class = int(13 * ((tilt + 90) / 15) + ((pan + 90) / 15))
pics_by_class[p_class].append(count + 1)
# Increase cropped picture count.
count = count + 2
print("Count:", count)
'''
If duplicate_until has a value of -1, the pictures in the dataset are duplicated until the number of cropped
pictures match the number of pictures indicated by start_count; else, the target number of pictures per class
will match that value.
'''
if duplicate_until == -1:
increase = start_count / (count - start_count)
else:
target_len = duplicate_until
# If there are cropped pictures for the Pointing'04 dataset:
if (count - start_count) > 0:
# For each class:
for i in range(len(pics_by_class)):
# If there are pictures in the current class:
if pics_by_class[i]:
'''
If duplicate_until has a value of -1, the target number of pictures for the current class is equal to the
number of pictures already in that class times the proportion between the value of start_count and the number
of cropped pictures obtained until this point.
'''
if duplicate_until == -1:
target_len = int(len(pics_by_class[i]) * increase)
# If the number of pics in the current class is below the target number of pictures:
if len(pics_by_class[i]) < target_len:
# Get pose values from class label.
tilt = int(i / 13) * 15 - 90
pan = int(i % 13) * 15 - 90
# Duplicate pictures in class until the target number of pictures is reached.
for j in range(target_len - len(pics_by_class[i])):
shutil.copyfile(destination_dir + "pic_" + str(pics_by_class[i][int(j % len(pics_by_class[i]))]) + ".jpg", destination_dir + "pic_" + str(count) + ".jpg")
file.write("pic_" + str(count) + ".jpg," + str(tilt) + "," + str(pan) + "\n")
# Increase cropped picture count.
count = count + 1
print("Count:", count)
# Calculate t_ratio and f_ratio.
t_ratio = t_count / p_count
f_ratio = f_count / d_count
# Return number of cropped pictures obtained (starting from start_count + 1), t_ratio and f_ratio.
return count, t_ratio, f_ratio
def main():
'''
This function acts as a testbench for the function clean_pointing04, using it to perform the basic processing of the
Pointing'04 dataset from a set of default values defined below.
'''
# Source paths.
pointing04_dir = 'original/HeadPoseImageDatabase/'
# Destination path.
destination_dir = 'clean/pointing04/'
# Detector model path.
head_detector_path = 'models/head-detector.h5'
# Detection parameters.
in_size = 512
out_size = 64
confidence_threshold = 0.75
# Output parameters.
grayscale_output = True
downscaling_interpolation = cv2.INTER_LINEAR
# Number of splits for class assignation.
num_splits_tilt = 8
num_splits_pan = 8
# Ratios for train/test and train/validation split.
test_ratio = 0.2
validation_ratio = 0.2
# Detector model.
detector = ssd_512(image_size=(in_size, in_size, 3), n_classes=1, min_scale=0.1, max_scale=1, mode='inference')
detector.load_weights(head_detector_path)
# Check if output directory exists.
try:
os.mkdir(destination_dir)
print("Directory", destination_dir, "created.")
except FileExistsError:
print("Directory", destination_dir, "already exists.")
shutil.rmtree(destination_dir)
os.mkdir(destination_dir)
# Actual cleaning.
clean_pointing04(pointing04_dir, destination_dir, detector, confidence_threshold, out_size, grayscale_output, downscaling_interpolation)
# Assign classes.
class_assign(destination_dir, num_splits_tilt, num_splits_pan)
# Split dataset.
split_dataset(destination_dir, test_ratio, validation_ratio)
# Get normalization parameters.
find_norm_parameters(destination_dir)
# OPTIONAL: Save dataset as numpy arrays (for uploading to Google Colab).
store_dataset_arrays(destination_dir)
if __name__ == "__main__":
main()