Skip to content

Commit afa3b24

Browse files
Merge pull request #21 from ispras/Q-attack
Q attack
2 parents 704a10a + 220dc27 commit afa3b24

File tree

5 files changed

+415
-3
lines changed

5 files changed

+415
-3
lines changed

experiments/attack_defense_test.py

+109-1
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,116 @@ def test_nettack_evasion():
343343
print(f"info_before_evasion_attack: {info_before_evasion_attack}")
344344
print(f"info_after_evasion_attack: {info_after_evasion_attack}")
345345

346+
def test_qattack():
347+
from attacks.QAttack import qattack
348+
my_device = device('cpu')
349+
350+
# Load dataset
351+
# full_name = ("single-graph", "Planetoid", 'Cora')
352+
full_name = ('single-graph', 'pytorch-geometric-other', 'KarateClub')
353+
dataset, data, results_dataset_path = DatasetManager.get_by_full_name(
354+
full_name=full_name,
355+
dataset_ver_ind=0
356+
)
357+
358+
# Train model on original dataset and remember the model metric and node predicted probability
359+
gcn_gcn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn')
360+
361+
manager_config = ConfigPattern(
362+
_config_class="ModelManagerConfig",
363+
_config_kwargs={
364+
"mask_features": [],
365+
"optimizer": {
366+
"_class_name": "Adam",
367+
"_config_kwargs": {},
368+
}
369+
}
370+
)
371+
372+
gnn_model_manager = FrameworkGNNModelManager(
373+
gnn=gcn_gcn,
374+
dataset_path=results_dataset_path,
375+
manager_config=manager_config,
376+
modification=ModelModificationConfig(model_ver_ind=0, epochs=0)
377+
)
378+
379+
gnn_model_manager.gnn.to(my_device)
380+
381+
num_steps = 100
382+
gnn_model_manager.train_model(gen_dataset=dataset,
383+
steps=num_steps,
384+
save_model_flag=False)
385+
386+
evasion_attack_config = ConfigPattern(
387+
_class_name="QAttack",
388+
_import_path=EVASION_ATTACK_PARAMETERS_PATH,
389+
_config_class="EvasionAttackConfig",
390+
_config_kwargs={
391+
}
392+
)
393+
394+
gnn_model_manager.set_evasion_attacker(evasion_attack_config=evasion_attack_config)
395+
396+
# Evaluate model
397+
398+
# acc_train = gnn_model_manager.evaluate_model(gen_dataset=dataset,
399+
# metrics=[Metric("Accuracy", mask='train')])['train']['Accuracy']
400+
401+
402+
acc_test = gnn_model_manager.evaluate_model(gen_dataset=dataset,
403+
metrics=[Metric("Accuracy", mask='test')])['test']['Accuracy']
404+
# print(f"Accuracy on train: {acc_train}. Accuracy on test: {acc_test}")
405+
print(f"Accuracy on test: {acc_test}")
406+
407+
# Node for attack
408+
# node_idx = 0
409+
#
410+
# # Model prediction on a node before an evasion attack on it
411+
# gnn_model_manager.gnn.eval()
412+
# with torch.no_grad():
413+
# probabilities = torch.exp(gnn_model_manager.gnn(dataset.data.x, dataset.data.edge_index))
414+
#
415+
# predicted_class = probabilities[node_idx].argmax().item()
416+
# predicted_probability = probabilities[node_idx][predicted_class].item()
417+
# real_class = dataset.data.y[node_idx].item()
418+
419+
# info_before_evasion_attack = {"node_idx": node_idx,
420+
# "predicted_class": predicted_class,
421+
# "predicted_probability": predicted_probability,
422+
# "real_class": real_class}
423+
424+
# Attack config
425+
426+
427+
#dataset = gnn_model_manager.evasion_attacker.attack(gnn_model_manager, dataset, None)
428+
429+
# Attack
430+
# gnn_model_manager.evaluate_model(gen_dataset=dataset, metrics=[Metric("F1", mask='test', average='macro')])
431+
#
432+
# acc_test = gnn_model_manager.evaluate_model(gen_dataset=dataset,
433+
# metrics=[Metric("Accuracy", mask='test')])['test']['Accuracy']
434+
# print(f"Accuracy on test after attack: {acc_test}")
435+
436+
# # Model prediction on a node after an evasion attack on it
437+
# with torch.no_grad():
438+
# probabilities = torch.exp(gnn_model_manager.gnn(gnn_model_manager.evasion_attacker.attack_diff.data.x,
439+
# gnn_model_manager.evasion_attacker.attack_diff.data.edge_index))
440+
#
441+
# predicted_class = probabilities[node_idx].argmax().item()
442+
# predicted_probability = probabilities[node_idx][predicted_class].item()
443+
# real_class = dataset.data.y[node_idx].item()
444+
#
445+
# info_after_evasion_attack = {"node_idx": node_idx,
446+
# "predicted_class": predicted_class,
447+
# "predicted_probability": predicted_probability,
448+
# "real_class": real_class}
449+
#
450+
# print(f"info_before_evasion_attack: {info_before_evasion_attack}")
451+
# print(f"info_after_evasion_attack: {info_after_evasion_attack}")
452+
346453

347454
if __name__ == '__main__':
348455
#test_attack_defense()
349456
torch.manual_seed(5000)
350-
test_meta()
457+
#test_meta()
458+
test_qattack()

metainfo/evasion_attack_parameters.json

+8-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
"perturb_structure": ["perturb_structure", "bool", true, {}, "Indicates whether the structure can be changed"],
1212
"direct": ["direct", "bool", true, {}, "Indicates whether to directly modify edges/features of the node attacked or only those of influencers"],
1313
"n_influencers": ["n_influencers", "int", 0, {"min": 0, "step": 1}, "Number of influencing nodes. Will be ignored if direct is True"]
14-
}
14+
},
15+
"QAttack": {
16+
"population_size": ["Population size", "int", 50, {"min": 1, "step": 1}, "Number of genes in population"],
17+
"individual_size": ["Individual size", "int", 30, {"min": 1, "step": 1}, "Number of rewiring operations within one gene"],
18+
"generations" : ["Generations", "int", 50, {"min": 0, "step": 1}, "Number of generations for genetic algorithm"],
19+
"prob_cross": ["Probability for crossover", "float", 0.5, {"min": 0, "max": 1, "step": 0.01}, "Probability of crossover between two genes"],
20+
"prob_mutate": ["Probability for mutation", "float", 0.02, {"min": 0, "max": 1, "step": 0.01}, "Probability of gene mutation"]
21+
}
1522
}
1623

src/attacks/QAttack/qattack.py

+239
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
import copy
2+
import math
3+
import numpy as np
4+
import random
5+
6+
from tqdm import tqdm
7+
from attacks.evasion_attacks import EvasionAttacker
8+
from attacks.QAttack.utils import get_adj_list, from_adj_list, adj_list_oriented_to_non_oriented
9+
10+
class QAttacker(EvasionAttacker):
11+
name = "QAttack"
12+
13+
def __init__(self, population_size, individual_size, generations, prob_cross, prob_mutate, **kwargs):
14+
super().__init__(**kwargs)
15+
self.population_size = population_size
16+
self.individual_size = individual_size
17+
self.generations = generations
18+
self.prob_cross = prob_cross
19+
self.prob_mutate = prob_mutate
20+
21+
def init(self, gen_dataset):
22+
"""
23+
Init first population:
24+
gen_dataset - graph-dataset
25+
population_size - size of population
26+
individual_size - amount of rewiring actions in one gene/individual
27+
"""
28+
self.population = []
29+
30+
self.adj_list = get_adj_list(gen_dataset)
31+
32+
for i in tqdm(range(self.population_size), desc='Init first population:'):
33+
non_isolated_nodes = set(gen_dataset.dataset.edge_index[0].tolist()).union(
34+
set(gen_dataset.dataset.edge_index[1].tolist()))
35+
selected_nodes = np.random.choice(list(self.adj_list.keys()), size=self.individual_size, replace=False)
36+
gene = {}
37+
for n in selected_nodes:
38+
connected_nodes = set(self.adj_list[n])
39+
connected_nodes.add(n)
40+
addition_nodes = non_isolated_nodes.difference(connected_nodes)
41+
gene[n] = {'add': np.random.choice(list(addition_nodes), size=1),
42+
'del': np.random.choice(list(self.adj_list[n]), size=1)}
43+
self.population.append(gene)
44+
45+
def fitness(self, model, gen_dataset):
46+
"""
47+
Calculate fitness function with node classification
48+
"""
49+
50+
fit_scores = []
51+
for i in range(self.population_size):
52+
# Get rewired dataset
53+
dataset = copy.deepcopy(gen_dataset.dataset)
54+
rewiring = self.population[i]
55+
adj_list = get_adj_list(dataset)
56+
for n in rewiring.keys():
57+
adj_list[n] = list(set(adj_list[n]).union({int(rewiring[n]['add'])}).difference({int(rewiring[n]['del'])}))
58+
dataset.edge_index = from_adj_list(adj_list)
59+
60+
# Get labels from black-box
61+
labels = model.gnn.get_answer(dataset.x, dataset.edge_index)
62+
labeled_nodes = {n: labels.tolist()[n-1] for n in adj_list.keys()} # FIXME check order for labels and node id consistency
63+
64+
# Calculate modularity
65+
Q = self.modularity(adj_list, labeled_nodes)
66+
fit_scores.append(1 / math.exp(Q))
67+
return fit_scores
68+
69+
def fitness_individual(self, model, gen_dataset, gene):
70+
dataset = copy.deepcopy(gen_dataset.dataset)
71+
rewiring = gene
72+
adj_list = get_adj_list(dataset)
73+
for n in rewiring.keys():
74+
adj_list[n] = list(set(adj_list[n]).union(set(rewiring[n]['add'])).difference(set(rewiring[n]['del'])))
75+
dataset.edge_index = from_adj_list(adj_list)
76+
77+
# Get labels from black-box
78+
labels = model.gnn.get_answer(dataset.x, dataset.edge_index)
79+
labeled_nodes = {n: labels.tolist()[n-1] for n in adj_list.keys()} # FIXME check order for labels and node id consistency
80+
81+
# Calculate modularity
82+
Q = self.modularity(adj_list, labeled_nodes)
83+
return 1 / math.exp(Q)
84+
85+
@staticmethod
86+
def modularity(adj_list, labeled_nodes):
87+
"""
88+
Calculation of graph modularity with specified node partition on communities
89+
"""
90+
# TODO implement oriented-modularity
91+
92+
inc = dict([])
93+
deg = dict([])
94+
95+
links = 0
96+
non_oriented_adj_list = adj_list_oriented_to_non_oriented(adj_list)
97+
for k, v in non_oriented_adj_list.items():
98+
links += len(v)
99+
if links == 0:
100+
raise ValueError("A graph without link has an undefined modularity")
101+
links //= 2
102+
103+
for node, edges in non_oriented_adj_list.items():
104+
com = labeled_nodes[node]
105+
deg[com] = deg.get(com, 0.) + len(non_oriented_adj_list[node])
106+
for neighbor in edges:
107+
edge_weight = 1 # TODO weighted graph to be implemented
108+
if labeled_nodes[neighbor] == com:
109+
if neighbor == node:
110+
inc[com] = inc.get(com, 0.) + float(edge_weight)
111+
else:
112+
inc[com] = inc.get(com, 0.) + float(edge_weight) / 2.
113+
114+
res = 0.
115+
for com in set(labeled_nodes.values()):
116+
res += (inc.get(com, 0.) / links) - \
117+
(deg.get(com, 0.) / (2. * links)) ** 2
118+
return res
119+
120+
def selection(self, model_manager, gen_dataset):
121+
fit_scores = self.fitness(model_manager, gen_dataset)
122+
probs = [i / sum(fit_scores) for i in fit_scores]
123+
selected_population = copy.deepcopy(self.population)
124+
for i in range(self.population_size):
125+
selected_population[i] = copy.deepcopy(self.population[np.random.choice(
126+
self.population_size, 1, False, probs)[0]])
127+
self.population = selected_population
128+
129+
def crossover(self):
130+
for i in range(0, self.population_size // 2, 2):
131+
parent_1 = self.population[i]
132+
parent_2 = self.population[i + 1]
133+
crossover_prob = np.random.random()
134+
if crossover_prob <= self.prob_cross:
135+
self.population[i * 2], self.population[i * 2 + 1] = self.gene_crossover(parent_1, parent_2)
136+
else:
137+
self.population[i * 2], self.population[i * 2 + 1] = (copy.deepcopy(self.population[i * 2]),
138+
copy.deepcopy(self.population[i * 2 + 1]))
139+
140+
def gene_crossover(self, parent_1, parent_2):
141+
parent_1_set = set(parent_1.keys())
142+
parent_2_set = set(parent_2.keys())
143+
144+
parent_1_unique = parent_1_set.difference(parent_2_set)
145+
parent_2_unique = parent_2_set.difference(parent_1_set)
146+
147+
parent_1_cross = list(parent_1_unique)
148+
parent_2_cross = list(parent_2_unique)
149+
150+
assert len(parent_1_cross) == len(parent_2_cross)
151+
if len(parent_1_cross) == 0:
152+
return parent_1, parent_2
153+
n = np.random.randint(1, len(parent_1_cross) + 1)
154+
parent_1_cross = random.sample(parent_1_cross, n)
155+
parent_2_cross = random.sample(parent_2_cross, n)
156+
157+
parent_1_set.difference_update(parent_1_cross)
158+
parent_2_set.difference_update(parent_2_cross)
159+
160+
parent_1_set.update(parent_2_cross)
161+
parent_2_set.update(parent_1_cross)
162+
163+
child_1 = {}
164+
child_2 = {}
165+
for n in parent_1_set:
166+
if n in parent_1.keys():
167+
child_1[n] = parent_1[n]
168+
else:
169+
child_1[n] = parent_2[n]
170+
for n in parent_2_set:
171+
if n in parent_2.keys():
172+
child_2[n] = parent_2[n]
173+
else:
174+
child_2[n] = parent_1[n]
175+
176+
return child_1,child_2
177+
178+
def mutation(self, gen_dataset):
179+
for i in range(self.population_size):
180+
keys = self.population[i].keys()
181+
for n in list(keys):
182+
mutation_prob = np.random.random()
183+
if mutation_prob <= self.prob_mutate:
184+
mut_type = np.random.randint(3)
185+
dataset = copy.deepcopy(gen_dataset.dataset)
186+
rewiring = self.population[i]
187+
adj_list = get_adj_list(dataset)
188+
for n in rewiring.keys():
189+
adj_list[n] = list(
190+
set(adj_list[n]).union(set([int(rewiring[n]['add'])])).difference(set([int(rewiring[n]['del'])])))
191+
dataset.edge_index = from_adj_list(adj_list)
192+
non_isolated_nodes = set(gen_dataset.dataset.edge_index[0].tolist()).union(
193+
set(gen_dataset.dataset.edge_index[1].tolist()))
194+
if mut_type == 0:
195+
# add mutation
196+
connected_nodes = set(self.adj_list[n])
197+
connected_nodes.add(n)
198+
addition_nodes = non_isolated_nodes.difference(connected_nodes)
199+
self.population[i][n]['add'] = np.random.choice(list(addition_nodes), 1)
200+
elif mut_type == 1:
201+
# del mutation
202+
self.population[i][n]['del'] = np.random.choice(list(adj_list[n]), 1)
203+
else:
204+
selected_nodes = set(self.population[i].keys())
205+
non_selected_nodes = non_isolated_nodes.difference(selected_nodes)
206+
new_node = np.random.choice(list(non_selected_nodes), size=1, replace=False)[0]
207+
self.population[i].pop(n)
208+
addition_nodes = non_isolated_nodes.difference(set(self.adj_list[new_node]))
209+
self.population[i][new_node] = {}
210+
self.population[i][new_node]['add'] = np.random.choice(list(addition_nodes), 1)
211+
self.population[i][new_node]['del'] = np.random.choice(list(adj_list[new_node]), 1)
212+
213+
def elitism(self, model, gen_dataset):
214+
fit_scores = list(enumerate(self.fitness(model, gen_dataset)))
215+
fit_scores = sorted(fit_scores, key=lambda x: x[1])
216+
sort_order = [x[0] for x in fit_scores]
217+
self.population = [self.population[i] for i in sort_order]
218+
elitism_size = int(0.1 * self.population_size)
219+
self.population[:elitism_size] = self.population[-elitism_size:]
220+
return self.population[-1]
221+
222+
223+
def attack(self, model_manager, gen_dataset, mask_tensor):
224+
self.init(gen_dataset)
225+
226+
for i in tqdm(range(self.generations), desc='Attack iterations:', position=0, leave=True):
227+
self.selection(model_manager, gen_dataset)
228+
self.crossover()
229+
self.mutation(gen_dataset)
230+
best_offspring = self.elitism(model_manager, gen_dataset)
231+
232+
rewiring = best_offspring
233+
adj_list = get_adj_list(gen_dataset)
234+
for n in rewiring.keys():
235+
adj_list[n] = list(
236+
set(adj_list[n]).union(set([int(rewiring[n]['add'])])).difference(set([int(rewiring[n]['del'])])))
237+
238+
gen_dataset.dataset.data.edge_index = from_adj_list(adj_list)
239+
return gen_dataset

0 commit comments

Comments
 (0)