-
Notifications
You must be signed in to change notification settings - Fork 18
/
data_generator_array.py
executable file
·198 lines (154 loc) · 7.6 KB
/
data_generator_array.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# -*- coding: utf-8 -*-
"""
This file implements the data generator used during the training of the head pose estimator models in this project.
"""
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from math import ceil
import numpy as np
class HeadPoseDataGenerator(Sequence):
'''
This class implements a basic Keras data generator overriding methods from its parent class Sequence. The purpose
of this data generator is to deliver in each batch a set of images from the subset used to initalize this generator
containing an equal number of members per class.
'''
def __init__(self, pose_dataframe, img_array, batch_size,
normalize=False, input_norm=None, tilt_norm=None, pan_norm=None,
augment=False, shift_range=None, zoom_range=None, brightness_range=None,
img_rescale=1, out_rescale=1):
'''
Initializes the data generator with the data from a given subset from the original dataset, and the values to
use when doing data augmentation.
Arguments:
pose_dataframe: Dataframe containing a list of each picture in the given subset and its pose values.
img_array: Numpy array containing the images from the given subset.
batch_size: Number of pictures per batch.
normalize: If the data shall be normalized or not.
input_norm: Tuple containing mean and std values for normalizing pictures in the dataset.
tilt_norm: Tuple containing mean and std values for normalizing tilt values in the dataset.
pan_norm: Tuple containing mean and std values for normalizing pan values in the dataset.
augment: If data augmentation shall be applied or not.
shift_range: Value (between 0 and 1) indicating the portion of the length of the side of each picture that
can be used to shift the picture (in both axes).
zoom_range: Tuple containing the minimum and maximum values used to apply zoom to each picture.
brightness_range: Tuple containing the minimum and maximum values used to apply a brightness transformation
to each picture.
img_rescale: Each pixel from every picture in the subset will be multiplied by this value.
out_rescale: Tilt and pan values for every picture in the subset will be multiplied by this value.
'''
# Create empty arrays for pictures and labels from the subset.
self.pics = []
self.labels = []
# Initialize batch size.
self.batch_size = batch_size
# Initialize normalization parameters.
self.normalize = normalize
self.input_norm = input_norm
'''
Initialize the parameter controlling if data augmentation shall be applied or not, and data augmentation
parameters.
'''
self.augment = augment
if self.augment == True:
self.generator = ImageDataGenerator(width_shift_range=shift_range, height_shift_range=shift_range,
brightness_range=brightness_range, zoom_range=zoom_range)
# Initialize scaling parameters.
self.img_rescale = img_rescale
self.out_rescale = out_rescale
'''
Initialize the iterator used to control the position of the next picture from every class that will be included
in a batch.
'''
self.common_iterator = 0
# Sort dataframe by class.
df = pose_dataframe.sort_values('class')
# Initialize the number of pictures in the dataset.
self.total_size = len(df.index)
# Load images and pose values into the previously created arrays.
prev_class = -1
class_index = -1
# For each image in the (ordered) dataset:
for index, row in df.iterrows():
'''
If the class for the current picture is different from the last class recorded, append an empty list for the
new class.
'''
if row['class'] != prev_class:
prev_class = row['class']
self.pics.append([])
self.labels.append([])
class_index = class_index + 1
# Append picture to corresponding class array.
self.pics[class_index].append(np.squeeze(img_array[index]))
# Append labels to corresponding class array (normalized and rescaled).
self.labels[class_index].append([((row['tilt'] * out_rescale) - tilt_norm[0]) / tilt_norm[1] , ((row['pan'] * out_rescale) - pan_norm[0]) / pan_norm[1]])
# Assert batch size is a multiple of the number of classes.
assert(batch_size % len(self.pics) == 0)
def __data_generation(self):
'''
Outputs a batch of pictures.
Returns:
X: Pictures in the batch.
y: Labels for each picture in the batch.
'''
# Create empty lists for pictures and labels.
X = []
y = []
# For each picture-per-class:
for i in range(int(self.batch_size / len(self.pics))):
# For each class:
for j in range(len(self.pics)):
# Select the next picture in the class list (start from beginning after the last picture).
pic = self.pics[j][int(self.common_iterator % len(self.pics[j]))]
pic = np.expand_dims(pic, axis=2)
# Apply data augmentation.
if self.augment == True:
transformation = self.generator.get_random_transform(pic.shape)
transformation['zx'] = transformation['zy']
pic = self.generator.apply_transform(pic, transformation)
# Rescale each pixel value in image.
pic = pic * self.img_rescale
# Normalize image.
if self.normalize == True:
pic = (pic - self.input_norm[0]) / self.input_norm[1]
# Add image and labels to the batch.
X.append(pic)
y.append(self.labels[j][int(self.common_iterator % len(self.labels[j]))])
# Update iterator.
self.common_iterator = self.common_iterator + 1
# Transform lists into Numpy arrays.
X = np.array(X)
y = np.array(y)
# Return images and labels.
return X, y
def __len__(self):
'''
Outputs the length (number of batches) that the data generator can provide.
Returns:
l: The length of the data generator.
'''
'''
Calculate the length of the data generator as the relation between the total number of images and the size of
each batch; in order to function properly with uneven class lengths the number is rounded to the smaller integer
bigger or equal to the obtained result.
'''
l = ceil(self.total_size / self.batch_size)
return l
def __getitem__(self, index):
'''
Outputs a new batch given the batch index.
Returns:
X: Pictures in the batch.
y: Labels for each picture in the batch.
'''
# Set the class iterator in the correct position for obtaining the requested batch.
self.common_iterator = index * int(self.batch_size / len(self.pics))
# Generate the batch.
X, y = self.__data_generation()
# Return images and labels for the requested batch.
return X, y
def reset(self):
'''
Resets the class iterator to its initial position.
'''
self.common_iterator = 0