1
+ import copy
2
+ import math
3
+ import numpy as np
4
+ import random
5
+
6
+ from tqdm import tqdm
7
+ from attacks .evasion_attacks import EvasionAttacker
8
+ from attacks .QAttack .utils import get_adj_list , from_adj_list , adj_list_oriented_to_non_oriented
9
+
10
+ class QAttacker (EvasionAttacker ):
11
+ name = "QAttack"
12
+
13
+ def __init__ (self , population_size , individual_size , generations , prob_cross , prob_mutate , ** kwargs ):
14
+ super ().__init__ (** kwargs )
15
+ self .population_size = population_size
16
+ self .individual_size = individual_size
17
+ self .generations = generations
18
+ self .prob_cross = prob_cross
19
+ self .prob_mutate = prob_mutate
20
+
21
+ def init (self , gen_dataset ):
22
+ """
23
+ Init first population:
24
+ gen_dataset - graph-dataset
25
+ population_size - size of population
26
+ individual_size - amount of rewiring actions in one gene/individual
27
+ """
28
+ self .population = []
29
+
30
+ self .adj_list = get_adj_list (gen_dataset )
31
+
32
+ for i in tqdm (range (self .population_size ), desc = 'Init first population:' ):
33
+ non_isolated_nodes = set (gen_dataset .dataset .edge_index [0 ].tolist ()).union (
34
+ set (gen_dataset .dataset .edge_index [1 ].tolist ()))
35
+ selected_nodes = np .random .choice (list (self .adj_list .keys ()), size = self .individual_size , replace = False )
36
+ gene = {}
37
+ for n in selected_nodes :
38
+ connected_nodes = set (self .adj_list [n ])
39
+ connected_nodes .add (n )
40
+ addition_nodes = non_isolated_nodes .difference (connected_nodes )
41
+ gene [n ] = {'add' : np .random .choice (list (addition_nodes ), size = 1 ),
42
+ 'del' : np .random .choice (list (self .adj_list [n ]), size = 1 )}
43
+ self .population .append (gene )
44
+
45
+ def fitness (self , model , gen_dataset ):
46
+ """
47
+ Calculate fitness function with node classification
48
+ """
49
+
50
+ fit_scores = []
51
+ for i in range (self .population_size ):
52
+ # Get rewired dataset
53
+ dataset = copy .deepcopy (gen_dataset .dataset )
54
+ rewiring = self .population [i ]
55
+ adj_list = get_adj_list (dataset )
56
+ for n in rewiring .keys ():
57
+ adj_list [n ] = list (set (adj_list [n ]).union ({int (rewiring [n ]['add' ])}).difference ({int (rewiring [n ]['del' ])}))
58
+ dataset .edge_index = from_adj_list (adj_list )
59
+
60
+ # Get labels from black-box
61
+ labels = model .gnn .get_answer (dataset .x , dataset .edge_index )
62
+ labeled_nodes = {n : labels .tolist ()[n - 1 ] for n in adj_list .keys ()} # FIXME check order for labels and node id consistency
63
+
64
+ # Calculate modularity
65
+ Q = self .modularity (adj_list , labeled_nodes )
66
+ fit_scores .append (1 / math .exp (Q ))
67
+ return fit_scores
68
+
69
+ def fitness_individual (self , model , gen_dataset , gene ):
70
+ dataset = copy .deepcopy (gen_dataset .dataset )
71
+ rewiring = gene
72
+ adj_list = get_adj_list (dataset )
73
+ for n in rewiring .keys ():
74
+ adj_list [n ] = list (set (adj_list [n ]).union (set (rewiring [n ]['add' ])).difference (set (rewiring [n ]['del' ])))
75
+ dataset .edge_index = from_adj_list (adj_list )
76
+
77
+ # Get labels from black-box
78
+ labels = model .gnn .get_answer (dataset .x , dataset .edge_index )
79
+ labeled_nodes = {n : labels .tolist ()[n - 1 ] for n in adj_list .keys ()} # FIXME check order for labels and node id consistency
80
+
81
+ # Calculate modularity
82
+ Q = self .modularity (adj_list , labeled_nodes )
83
+ return 1 / math .exp (Q )
84
+
85
+ @staticmethod
86
+ def modularity (adj_list , labeled_nodes ):
87
+ """
88
+ Calculation of graph modularity with specified node partition on communities
89
+ """
90
+ # TODO implement oriented-modularity
91
+
92
+ inc = dict ([])
93
+ deg = dict ([])
94
+
95
+ links = 0
96
+ non_oriented_adj_list = adj_list_oriented_to_non_oriented (adj_list )
97
+ for k , v in non_oriented_adj_list .items ():
98
+ links += len (v )
99
+ if links == 0 :
100
+ raise ValueError ("A graph without link has an undefined modularity" )
101
+ links //= 2
102
+
103
+ for node , edges in non_oriented_adj_list .items ():
104
+ com = labeled_nodes [node ]
105
+ deg [com ] = deg .get (com , 0. ) + len (non_oriented_adj_list [node ])
106
+ for neighbor in edges :
107
+ edge_weight = 1 # TODO weighted graph to be implemented
108
+ if labeled_nodes [neighbor ] == com :
109
+ if neighbor == node :
110
+ inc [com ] = inc .get (com , 0. ) + float (edge_weight )
111
+ else :
112
+ inc [com ] = inc .get (com , 0. ) + float (edge_weight ) / 2.
113
+
114
+ res = 0.
115
+ for com in set (labeled_nodes .values ()):
116
+ res += (inc .get (com , 0. ) / links ) - \
117
+ (deg .get (com , 0. ) / (2. * links )) ** 2
118
+ return res
119
+
120
+ def selection (self , model_manager , gen_dataset ):
121
+ fit_scores = self .fitness (model_manager , gen_dataset )
122
+ probs = [i / sum (fit_scores ) for i in fit_scores ]
123
+ selected_population = copy .deepcopy (self .population )
124
+ for i in range (self .population_size ):
125
+ selected_population [i ] = copy .deepcopy (self .population [np .random .choice (
126
+ self .population_size , 1 , False , probs )[0 ]])
127
+ self .population = selected_population
128
+
129
+ def crossover (self ):
130
+ for i in range (0 , self .population_size // 2 , 2 ):
131
+ parent_1 = self .population [i ]
132
+ parent_2 = self .population [i + 1 ]
133
+ crossover_prob = np .random .random ()
134
+ if crossover_prob <= self .prob_cross :
135
+ self .population [i * 2 ], self .population [i * 2 + 1 ] = self .gene_crossover (parent_1 , parent_2 )
136
+ else :
137
+ self .population [i * 2 ], self .population [i * 2 + 1 ] = (copy .deepcopy (self .population [i * 2 ]),
138
+ copy .deepcopy (self .population [i * 2 + 1 ]))
139
+
140
+ def gene_crossover (self , parent_1 , parent_2 ):
141
+ parent_1_set = set (parent_1 .keys ())
142
+ parent_2_set = set (parent_2 .keys ())
143
+
144
+ parent_1_unique = parent_1_set .difference (parent_2_set )
145
+ parent_2_unique = parent_2_set .difference (parent_1_set )
146
+
147
+ parent_1_cross = list (parent_1_unique )
148
+ parent_2_cross = list (parent_2_unique )
149
+
150
+ assert len (parent_1_cross ) == len (parent_2_cross )
151
+ if len (parent_1_cross ) == 0 :
152
+ return parent_1 , parent_2
153
+ n = np .random .randint (1 , len (parent_1_cross ) + 1 )
154
+ parent_1_cross = random .sample (parent_1_cross , n )
155
+ parent_2_cross = random .sample (parent_2_cross , n )
156
+
157
+ parent_1_set .difference_update (parent_1_cross )
158
+ parent_2_set .difference_update (parent_2_cross )
159
+
160
+ parent_1_set .update (parent_2_cross )
161
+ parent_2_set .update (parent_1_cross )
162
+
163
+ child_1 = {}
164
+ child_2 = {}
165
+ for n in parent_1_set :
166
+ if n in parent_1 .keys ():
167
+ child_1 [n ] = parent_1 [n ]
168
+ else :
169
+ child_1 [n ] = parent_2 [n ]
170
+ for n in parent_2_set :
171
+ if n in parent_2 .keys ():
172
+ child_2 [n ] = parent_2 [n ]
173
+ else :
174
+ child_2 [n ] = parent_1 [n ]
175
+
176
+ return child_1 ,child_2
177
+
178
+ def mutation (self , gen_dataset ):
179
+ for i in range (self .population_size ):
180
+ keys = self .population [i ].keys ()
181
+ for n in list (keys ):
182
+ mutation_prob = np .random .random ()
183
+ if mutation_prob <= self .prob_mutate :
184
+ mut_type = np .random .randint (3 )
185
+ dataset = copy .deepcopy (gen_dataset .dataset )
186
+ rewiring = self .population [i ]
187
+ adj_list = get_adj_list (dataset )
188
+ for n in rewiring .keys ():
189
+ adj_list [n ] = list (
190
+ set (adj_list [n ]).union (set ([int (rewiring [n ]['add' ])])).difference (set ([int (rewiring [n ]['del' ])])))
191
+ dataset .edge_index = from_adj_list (adj_list )
192
+ non_isolated_nodes = set (gen_dataset .dataset .edge_index [0 ].tolist ()).union (
193
+ set (gen_dataset .dataset .edge_index [1 ].tolist ()))
194
+ if mut_type == 0 :
195
+ # add mutation
196
+ connected_nodes = set (self .adj_list [n ])
197
+ connected_nodes .add (n )
198
+ addition_nodes = non_isolated_nodes .difference (connected_nodes )
199
+ self .population [i ][n ]['add' ] = np .random .choice (list (addition_nodes ), 1 )
200
+ elif mut_type == 1 :
201
+ # del mutation
202
+ self .population [i ][n ]['del' ] = np .random .choice (list (adj_list [n ]), 1 )
203
+ else :
204
+ selected_nodes = set (self .population [i ].keys ())
205
+ non_selected_nodes = non_isolated_nodes .difference (selected_nodes )
206
+ new_node = np .random .choice (list (non_selected_nodes ), size = 1 , replace = False )[0 ]
207
+ self .population [i ].pop (n )
208
+ addition_nodes = non_isolated_nodes .difference (set (self .adj_list [new_node ]))
209
+ self .population [i ][new_node ] = {}
210
+ self .population [i ][new_node ]['add' ] = np .random .choice (list (addition_nodes ), 1 )
211
+ self .population [i ][new_node ]['del' ] = np .random .choice (list (adj_list [new_node ]), 1 )
212
+
213
+ def elitism (self , model , gen_dataset ):
214
+ fit_scores = list (enumerate (self .fitness (model , gen_dataset )))
215
+ fit_scores = sorted (fit_scores , key = lambda x : x [1 ])
216
+ sort_order = [x [0 ] for x in fit_scores ]
217
+ self .population = [self .population [i ] for i in sort_order ]
218
+ elitism_size = int (0.1 * self .population_size )
219
+ self .population [:elitism_size ] = self .population [- elitism_size :]
220
+ return self .population [- 1 ]
221
+
222
+
223
+ def attack (self , model_manager , gen_dataset , mask_tensor ):
224
+ self .init (gen_dataset )
225
+
226
+ for i in tqdm (range (self .generations ), desc = 'Attack iterations:' , position = 0 , leave = True ):
227
+ self .selection (model_manager , gen_dataset )
228
+ self .crossover ()
229
+ self .mutation (gen_dataset )
230
+ best_offspring = self .elitism (model_manager , gen_dataset )
231
+
232
+ rewiring = best_offspring
233
+ adj_list = get_adj_list (gen_dataset )
234
+ for n in rewiring .keys ():
235
+ adj_list [n ] = list (
236
+ set (adj_list [n ]).union (set ([int (rewiring [n ]['add' ])])).difference (set ([int (rewiring [n ]['del' ])])))
237
+
238
+ gen_dataset .dataset .data .edge_index = from_adj_list (adj_list )
239
+ return gen_dataset
0 commit comments