-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathsiamese.py
323 lines (253 loc) · 11.9 KB
/
siamese.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import random
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.applications.vgg16 import VGG16
from keras.optimizers import SGD
import cv2
import numpy as np
from keras.layers import Input, Flatten, Dense, Dropout, Lambda
from keras.optimizers import RMSprop
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing.image import img_to_array
import os
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
margin = 1
return K.mean(y_true * K.square(y_pred) +
(1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
class SiameseFaceNet(object):
model_name = 'siamese-face-net'
VERBOSE = 1
def __init__(self):
self.model = None
self.vgg16_include_top = False
self.labels = None
self.config = None
self.input_shape = None
self.threshold = 0.5
self.vgg16_model = None
def img_to_encoding(self, image_path):
print('encoding: ', image_path)
if self.vgg16_model is None:
self.vgg16_model = self.create_vgg16_model()
image = cv2.imread(image_path, 1)
img = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)
input = img_to_array(img)
input = np.expand_dims(input, axis=0)
input = preprocess_input(input)
return self.vgg16_model.predict(input)
def load_model(self, model_dir_path):
config_file_path = SiameseFaceNet.get_config_path(model_dir_path=model_dir_path)
self.config = np.load(config_file_path).item()
self.labels = self.config['labels']
self.input_shape = self.config['input_shape']
self.threshold = self.config['threshold']
self.vgg16_include_top = self.config['vgg16_include_top']
self.vgg16_model = self.create_vgg16_model()
self.model = self.create_network(input_shape=self.input_shape)
weight_file_path = SiameseFaceNet.get_weight_path(model_dir_path)
self.model.load_weights(weight_file_path)
def create_base_network(self, input_shape):
'''Base network to be shared (eq. to feature extraction).
'''
input = Input(shape=input_shape)
x = Flatten()(input)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
x = Dense(128, activation='relu')(x)
return Model(input, x)
def accuracy(self, y_true, y_pred):
'''Compute classification accuracy with a fixed threshold on distances.
'''
return K.mean(K.equal(y_true, K.cast(y_pred < self.threshold, y_true.dtype)))
def create_network(self, input_shape):
# network definition
base_network = self.create_base_network(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance,
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model([input_a, input_b], distance)
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms, metrics=[self.accuracy])
print(model.summary())
return model
def create_pairs(self, database, names):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
'''
num_classes = len(database)
pairs = []
labels = []
n = min([len(database[name]) for name in database.keys()])
for d in range(len(names)):
name = names[d]
x = database[name]
for i in range(n):
pairs += [[x[i], x[(i + 1) % n]]]
inc = random.randrange(1, num_classes)
dn = (d + inc) % num_classes
z1, z2 = x[i], database[names[dn]][i]
pairs += [[z1, z2]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
@staticmethod
def get_config_path(model_dir_path):
return model_dir_path + os.path.sep + SiameseFaceNet.model_name + '-config.npy'
@staticmethod
def get_weight_path(model_dir_path):
return model_dir_path + os.path.sep + SiameseFaceNet.model_name + '-weights.h5'
@staticmethod
def get_architecture_path(model_dir_path):
return model_dir_path + os.path.sep + SiameseFaceNet.model_name + '-architecture.h5'
def create_vgg16_model(self):
vgg16_model = VGG16(include_top=self.vgg16_include_top, weights='imagenet')
vgg16_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy'])
return vgg16_model
def fit(self, database, model_dir_path, epochs=None, batch_size=None, threshold=None, vgg16_include_top=None):
if threshold is not None:
self.threshold = threshold
if batch_size is None:
batch_size = 128
if epochs is None:
epochs = 20
if vgg16_include_top is not None:
self.vgg16_include_top = vgg16_include_top
for name, feature in database.items():
self.input_shape = feature[0].shape
break
self.vgg16_model = self.create_vgg16_model()
self.model = self.create_network(input_shape=self.input_shape)
architecture_file_path = self.get_architecture_path(model_dir_path)
open(architecture_file_path, 'w').write(self.model.to_json())
names = []
self.labels = dict()
for name in database.keys():
names.append(name)
self.labels[name] = len(self.labels)
self.config = dict()
self.config['input_shape'] = self.input_shape
self.config['labels'] = self.labels
self.config['threshold'] = self.threshold
self.config['vgg16_include_top'] = self.vgg16_include_top
config_file_path = SiameseFaceNet.get_config_path(model_dir_path=model_dir_path)
np.save(config_file_path, self.config)
weight_file_path = SiameseFaceNet.get_weight_path(model_dir_path)
checkpoint = ModelCheckpoint(weight_file_path)
t_x, t_y = self.create_pairs(database, names)
print('data set pairs: ', t_x.shape)
self.model.fit([t_x[:, 0], t_x[:, 1]], t_y,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2,
verbose=SiameseFaceNet.VERBOSE,
callbacks=[checkpoint])
self.model.save_weights(weight_file_path)
def verify(self, image_path, identity, database, threshold=None):
"""
Function that verifies if the person on the "image_path" image is "identity".
Arguments:
image_path -- path to an image
identity -- string, name of the person you'd like to verify the identity. Has to be a resident of the Happy house.
database -- python dictionary mapping names of allowed people's names (strings) to their encodings (vectors).
model -- your Inception model instance in Keras
Returns:
dist -- distance between the image_path and the image of "identity" in the database.
door_open -- True, if the door should open. False otherwise.
"""
if threshold is not None:
self.threshold = threshold
# Step 1: Compute the encoding for the image. Use img_to_encoding() see example above. (≈ 1 line)
encoding = self.img_to_encoding(image_path)
# Step 2: Compute distance with identity's image (≈ 1 line)
input_pairs = []
x = database[identity]
for i in range(len(x)):
input_pairs.append([encoding, x[i]])
input_pairs = np.array(input_pairs)
dist = np.average(self.model.predict([input_pairs[:, 0], input_pairs[:, 1]]), axis=-1)[0]
# Step 3: Open the door if dist < threshold, else don't open (≈ 3 lines)
if dist < self.threshold:
print("It's " + str(identity))
is_valid = True
else:
print("It's not " + str(identity))
is_valid = False
return dist, is_valid
def who_is_it(self, image_path, database, threshold=None):
"""
Implements face recognition for the happy house by finding who is the person on the image_path image.
Arguments:
image_path -- path to an image
database -- database containing image encodings along with the name of the person on the image
model -- your Inception model instance in Keras
Returns:
min_dist -- the minimum distance between image_path encoding and the encodings from the database
identity -- string, the name prediction for the person on image_path
"""
if threshold is not None:
self.threshold = threshold
## Step 1: Compute the target "encoding" for the image. Use img_to_encoding() see example above. ## (≈ 1 line)
encoding = self.img_to_encoding(image_path)
## Step 2: Find the closest encoding ##
# Initialize "min_dist" to a large value, say 100 (≈1 line)
min_dist = 100
identity = None
# Loop over the database dictionary's names and encodings.
for (name, x) in database.items():
input_pairs = []
for i in range(len(x)):
input_pairs.append([encoding, x[i]])
input_pairs = np.array(input_pairs)
dist = np.average(self.model.predict([input_pairs[:, 0], input_pairs[:, 1]]), axis=-1)[0]
print("--for " + str(name) + ", the distance is " + str(dist))
# If this distance is less than the min_dist, then set min_dist to dist, and identity to name. (≈ 3 lines)
if dist < min_dist:
min_dist = dist
identity = name
if min_dist > self.threshold:
print("Not in the database.")
else:
print("it's " + str(identity) + ", the distance is " + str(min_dist))
return min_dist, identity
def main():
fnet = SiameseFaceNet()
fnet.vgg16_include_top = True
model_dir_path = './models'
image_dir_path = "./data/images"
database = dict()
database["danielle"] = [fnet.img_to_encoding(image_dir_path + "/danielle.png")]
database["younes"] = [fnet.img_to_encoding(image_dir_path + "/younes.jpg")]
database["tian"] = [fnet.img_to_encoding(image_dir_path + "/tian.jpg")]
database["andrew"] = [fnet.img_to_encoding(image_dir_path + "/andrew.jpg")]
database["kian"] = [fnet.img_to_encoding(image_dir_path + "/kian.jpg")]
database["dan"] = [fnet.img_to_encoding(image_dir_path + "/dan.jpg")]
database["sebastiano"] = [fnet.img_to_encoding(image_dir_path + "/sebastiano.jpg")]
database["bertrand"] = [fnet.img_to_encoding(image_dir_path + "/bertrand.jpg")]
database["kevin"] = [fnet.img_to_encoding(image_dir_path + "/kevin.jpg")]
database["felix"] = [fnet.img_to_encoding(image_dir_path + "/felix.jpg")]
database["benoit"] = [fnet.img_to_encoding(image_dir_path + "/benoit.jpg")]
database["arnaud"] = [fnet.img_to_encoding(image_dir_path + "/arnaud.jpg")]
fnet.fit(database=database, model_dir_path=model_dir_path)
fnet.load_model(model_dir_path)
fnet.verify(image_dir_path + "/camera_0.jpg", "younes", database)
fnet.verify(image_dir_path + "/camera_2.jpg", "kian", database)
fnet.who_is_it(image_dir_path + "/camera_0.jpg", database)
if __name__ == '__main__':
main()