-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathattack.py
335 lines (269 loc) · 12.7 KB
/
attack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# Train the Style Transfer Net
from __future__ import print_function
import numpy as np
import sys
import os
import argparse
from PIL import Image
import tensorflow as tf
import settings
import dataprep
import modelprep
from style_transfer_net import StyleTransferNet_adv
from utils import get_scope_var, save_rgb_img
np.set_printoptions(threshold=sys.maxsize)
parser = argparse.ArgumentParser(
description='Training Auto Encoder for Feature Space Attack')
parser.add_argument("--dataset", help="Dataset for training the auto encoder",
choices=["imagenet", "cifar10"] , default="imagenet")
parser.add_argument("--decoder", help="Depth of the decoder to use. The deeper one (e.g. 3) injects more structure change. " +
"And it becomes more harmful but less nature-looking.", type=int, choices=[1, 2, 3], default=1)
parser.add_argument(
"--scale", help="Whether to scale up the image size of CIFAR10 to the size of Imagenet", action="store_true")
parser.add_argument("--model", help="Model to attack.", default="imagenet_normal",
choices=["imagenet_normal", "imagenet_denoise", "cifar10_adv", "cifar10_nat", "cifar10_trades"])
parser.add_argument("--bound", help="Bound for attack, the exponential of sigma described in the paper", type=float, default=1.5)
args = parser.parse_args()
data_set = args.dataset
decoder = args.decoder
model_name = args.model
bound = args.bound
if data_set == "imagenet":
decoder_list = {1: "imagenet_shallowest",
2: "imagenet_shallow",
3: "imagenet"}
decoder_name = decoder_list[decoder]
elif data_set == "cifar10":
# One can choose to not to scale CIFAR10 to Imagenet for better speed. While for best quality, one need to consider scale the image size up
# The corresponding decoder name is cifar10_unscale
decoder_list = {1: "cifar10_shallowest",
2: "cifar10_shallow",
3: "cifar10"}
if args.scale:
decoder_name = decoder_list[decoder]
else:
decoder_name = "cifar10_unscale"
task_name = "attack"
# Put all the pre-defined const into settings and fetch them as global variables
settings.common_const_init(data_set, model_name, decoder_name, task_name)
logger = settings.logger
for k, v in settings.config.items():
globals()[k] = v
dataprep.init_data("eval")
get_data = dataprep.get_data
# (height, width, color_channels)
TRAINING_IMAGE_SHAPE = settings.config["IMAGE_SHAPE"]
EPOCHS = 4
EPSILON = 1e-5
BATCH_SIZE = settings.config["BATCH_SIZE"]
if data_set == "cifar10":
LEARNING_RATE = 1e-2
LR_DECAY_RATE = 1e-4
DECAY_STEPS = 1.0
adv_weight = 500
ITER=2000
CLIP_NORM_VALUE = 10.0
else:
if model_name .find("shallowest")>=0:
LEARNING_RATE = 5e-3
else:
LEARNING_RATE = 1e-2
LR_DECAY_RATE = 1e-3
DECAY_STEPS = 1.0
adv_weight = 128
ITER=500
CLIP_NORM_VALUE = 10.0
style_weight = 1
encoder_path = ENCODER_WEIGHTS_PATH
debug = True
if debug:
from datetime import datetime
start_time = datetime.now()
def grad_attack():
sess.run(stn.init_style, feed_dict=fdict)
sess.run(global_step.initializer)
rst_img, rst_loss, nat_acc, rst_acc,rst_mean,rst_sigma = sess.run(
[adv_img, content_loss_y, nat_output.acc_y_auto, adv_output.acc_y_auto, stn.meanS, stn.sigmaS], feed_dict=fdict)
print("Nature Acc:", nat_acc)
for i in range(ITER):
# Run an optimization step
_ = sess.run([train_op], feed_dict=fdict)
# Clip the bound
sess.run(stn.style_bound, feed_dict = fdict)
# Monitor the progress
_adv_img, acc, aloss, closs, _mean, _sigma = sess.run(
[adv_img, adv_output.acc_y_auto, adv_loss, content_loss_y, stn.meanS, stn.sigmaS], feed_dict=fdict)
for j in range(BATCH_SIZE):
# Save the best samples
if acc[j]<rst_acc[j] or (acc[j]==rst_acc[j] and closs[j]<rst_loss[j]):
rst_img[j]=_adv_img[j]
rst_acc[j] = acc[j]
rst_loss[j] = closs[j]
rst_mean[j] = _mean[j]
rst_sigma[j] = _sigma[j]
if i%50==0 :
acc=np.mean(acc)
print(i,acc,"advl",aloss,"contentl",closs)
# Reload the best saved samples
sess.run(stn.asgn, feed_dict={stn.meanS_ph: rst_mean, stn.sigmaS_ph: rst_sigma})
return rst_img
# get the traing image shape
HEIGHT, WIDTH, CHANNELS = TRAINING_IMAGE_SHAPE
INPUT_SHAPE = (None, HEIGHT, WIDTH, CHANNELS)
# Gradient Clip in case of numerical instability
def gradient(opt, vars, loss ):
gradients, variables = zip(*opt.compute_gradients(loss,vars))
g_split = [tf.unstack(g, BATCH_SIZE, axis=0) for g in gradients]
g1_list=[]
g2_list=[]
DIM = settings.config["DECODER_DIM"][-1]
limit = 10/np.sqrt(DIM)
for g1,g2 in zip(g_split[0],g_split[1]):
#(g1, g2), _ = tf.clip_by_global_norm([g1, g2], CLIP_NORM_VALUE)
g1 = tf.clip_by_value(g1,-1/np.sqrt(limit),1/np.sqrt(limit))
g2 = tf.clip_by_value(g2,-1/np.sqrt(limit),1/np.sqrt(limit))
g1_list.append(g1)
g2_list.append(g2)
gradients = [tf.stack(g1_list, axis=0), tf.stack(g2_list, axis=0)]
#gradients, _ = tf.clip_by_global_norm(gradients, 1.0)
opt = opt.apply_gradients(zip(gradients, variables), global_step=global_step)
return opt
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:
content = tf.placeholder(tf.float32, shape=INPUT_SHAPE, name='content')
label = tf.placeholder(tf.int64, shape=None, name="label")
# create the style transfer net
stn = StyleTransferNet_adv(encoder_path)
# pass content and style to the stn, getting the generated_img
dec_img, adv_img = stn.transform(content, p=bound)
img = content
stn_vars = get_scope_var("transform")
# get the target feature maps which is the output of AdaIN
target_features = stn.target_features
# pass the generated_img to the encoder, and use the output compute loss
enc_gen_adv, enc_gen_layers_adv = stn.encode(adv_img)
modelprep.init_classifier()
build_model = modelprep.build_model
restore_model = modelprep.restore_model
# Get the output from different input, this is a class which define different properties derived from logits
# To use your own model, you can get your own logits from content and pass it to class build_logits in utils.py
adv_output = build_model(adv_img, label, reuse=False)
nat_output = build_model(img, label, reuse=True)
dec_output = build_model(dec_img, label, reuse=True)
# We are minimizing the loss. Take the negative of the loss
# Use CW loss top5 for imagenet and CW top1 for cifar10.
# Here the target_loss represents CW loss, it is not the loss for targeted attack.
adv_loss = -adv_output.target_loss_auto
# compute the content loss
content_loss_y = tf.reduce_sum(
tf.reduce_mean(tf.square(enc_gen_adv - target_features), axis=[1, 2]),axis=-1)
content_loss = tf.reduce_sum(content_loss_y)
# compute the total loss
loss = content_loss + tf.reduce_sum(adv_loss * BATCH_SIZE * adv_weight)
decoder_vars = get_scope_var("decoder")
# Training step
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.inverse_time_decay(LEARNING_RATE, global_step, DECAY_STEPS, LR_DECAY_RATE)
train_op = gradient(tf.train.AdamOptimizer(learning_rate, beta1= 0.5),vars=stn_vars, loss=loss)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(decoder_vars, max_to_keep=1)
saver.restore(sess,Decoder_Model)
restore_model(sess)
###### Start Training ######
step = 0
if debug:
elapsed_time = datetime.now() - start_time
start_time = datetime.now()
print('\nElapsed time for preprocessing before actually train the model: %s' % elapsed_time)
print('Now begin to train the model...\n')
uid = 0
report_batch = 50
for batch in range(1,100+1):
if batch % report_batch == 1:
np_adv_image = []
np_benign_image = []
np_content_loss = []
np_acc_attack = []
np_acc_attack_5 = []
np_acc = []
np_acc_5 = []
np_decode_acc = []
np_decode_acc_5 = []
np_acc_5 = []
np_label = []
# run the training step
x_batch, y_batch = get_data()
fdict = {content: x_batch, label: y_batch}
grad_attack()
step += 1
for i in range(BATCH_SIZE):
gan_out = sess.run(adv_img, feed_dict=fdict)
save_out = np.concatenate(
(gan_out[i], x_batch[i], np.abs(gan_out[i]-x_batch[i])))
sz = TRAINING_IMAGE_SHAPE[1]
full_path = os.path.join(
base_dir_model, "%d" % step, "%d.jpg" % i)
os.makedirs(os.path.join(base_dir_model, "%d" %
step), exist_ok=True)
save_out = np.reshape(save_out, newshape=[sz*3, sz, 3])
save_rgb_img(save_out, path=full_path)
if batch % 1 == 0:
elapsed_time = datetime.now() - start_time
_content_loss, _adv_acc, _adv_loss, _loss, \
= sess.run([content_loss, adv_output.accuracy, adv_loss, loss, ], feed_dict=fdict)
_adv_img, _loss_y, _adv_acc_y, _adv_acc_y_5, _acc_y, _acc_y_5, _decode_acc_y, _decode_acc_y_5 = sess.run([
adv_img, content_loss_y, adv_output.acc_y, adv_output.acc_y_5, nat_output.acc_y, nat_output.acc_y_5, dec_output.acc_y, dec_output.acc_y_5], feed_dict=fdict)
np_adv_image.append(_adv_img)
np_benign_image.append(x_batch)
np_content_loss.append(_loss_y)
np_acc_attack.append(_adv_acc_y)
np_acc_attack_5 .append(_adv_acc_y_5)
np_acc_5 .append(_acc_y_5)
np_acc .append(_acc_y)
np_label.append(y_batch)
np_decode_acc.append(_decode_acc_y)
np_decode_acc_5.append(_decode_acc_y_5)
_adv_loss = np.sum(_adv_loss)
diff = (_adv_img - x_batch) /255
l2_norm = np.sum(diff*diff)
li_norm = np.mean( np.amax(np.abs(diff), axis=-1))
l1_norm = np.mean(np.sum(np.abs(diff), axis=-1))
print("l2_norm", l2_norm, "li_norm", li_norm, "l1_loss", l1_norm)
print('step: %d, total loss: %.3f, elapsed time: %s' % (step, _loss, elapsed_time))
print('content loss: %.3f' % (_content_loss))
print('adv loss : %.3f, weighted adv loss: %.3f , adv acc %.3f' %
(_adv_loss, adv_weight * _adv_loss, _adv_acc))
print("normal acc:", _acc_y)
print("adv acc:", _adv_acc_y)
print("normal acc top5:", _acc_y_5)
print("adv acc top5:", _adv_acc_y_5)
if batch % report_batch == 0:
np_adv_image_arr = np.concatenate(np_adv_image)
np_benign_image_arr = np.concatenate(np_benign_image)
np_content_loss_arr = np.concatenate(np_content_loss)
np_acc_attack_arr = np.concatenate(np_acc_attack)
np_acc_attack_5_arr = np.concatenate(np_acc_attack_5)
np_acc_arr = np.concatenate(np_acc)
np_acc_5_arr = np.concatenate(np_acc_5)
np_decode_acc_arr = np.concatenate(np_decode_acc)
np_decode_acc_5_arr = np.concatenate(np_decode_acc_5)
np_label_arr = np.concatenate(np_label)
saved_dict = {"adv_image": np_adv_image_arr,
"benign_image": np_benign_image_arr,
"content_loss": np_content_loss_arr,
"acc_attack": np_acc_attack_arr,
"acc_attack_5": np_acc_attack_5_arr,
"acc": np_acc_arr,
"acc_5": np_acc_5_arr,
"decode_acc": np_decode_acc_arr,
"decode_acc_5": np_decode_acc_5_arr,
"label": np_label_arr}
np.save(os.path.join(base_dir_model, "saved_samples%d.npy" %
(batch//report_batch)), saved_dict)
###### Done Training & Save the model ######
#saver.save(sess, model_save_path)
if debug:
elapsed_time = datetime.now() - start_time
print('Done training! Elapsed time: %s' % elapsed_time)
#print('Model is saved to: %s' % model_save_path)