Skip to content

Commit

Permalink
remake Nettack test for evasion
Browse files Browse the repository at this point in the history
  • Loading branch information
mishabounty committed Aug 14, 2024
1 parent 4df4669 commit 940f0eb
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 185 deletions.
112 changes: 44 additions & 68 deletions experiments/attack_defense_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,26 +175,16 @@ def test_attack_defense():
print(metric_loc)


def test_nettack_attack():
def test_nettack_evasion():
my_device = device('cpu')

# Choose the node index
node_idx = 1900

# Load dataset
full_name = ("single-graph", "Planetoid", 'Cora')
dataset, data, results_dataset_path = DatasetManager.get_by_full_name(
full_name=full_name,
dataset_ver_ind=0
)

# Create mask
train_test_split_coeff = 0.7
train_mask = torch.rand(data.x.size(0)) < train_test_split_coeff # 70% True, 30% False
data.train_mask = train_mask
data.test_mask = ~ train_mask
data.val_mask = torch.zeros(data.x.size(0), dtype=torch.bool)

# Train model on original dataset and remember the model metric and node predicted probability
gcn_gcn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn')

Expand All @@ -209,99 +199,85 @@ def test_nettack_attack():
}
)

steps_epochs = 2000
gcn_gcn_model_manager = FrameworkGNNModelManager(
gnn_model_manager = FrameworkGNNModelManager(
gnn=gcn_gcn,
dataset_path=results_dataset_path,
manager_config=manager_config,
modification=ModelModificationConfig(model_ver_ind=0, epochs=0)
)

gcn_gcn_model_manager.gnn.to(my_device)
gnn_model_manager.gnn.to(my_device)

train_test_split_path = gcn_gcn_model_manager.train_model(gen_dataset=dataset,
steps=steps_epochs,
save_model_flag=False,
metrics=[Metric("F1", mask='train', average=None)])
num_steps = 200
gnn_model_manager.train_model(gen_dataset=dataset,
steps=num_steps,
save_model_flag=False)

# save train_test_mask to test the model on poisoned data with the same split
# dataset.save_train_test_mask(train_test_split_path) # TODO сделать сохранение разбиения test/train
# Evaluate model
acc_train = gnn_model_manager.evaluate_model(gen_dataset=dataset,
metrics=[Metric("Accuracy", mask='train')])['train']['Accuracy']
acc_test = gnn_model_manager.evaluate_model(gen_dataset=dataset,
metrics=[Metric("Accuracy", mask='test')])['test']['Accuracy']
print(f"Accuracy on train: {acc_train}. Accuracy on test: {acc_test}")

metric_original_dataset = gcn_gcn_model_manager.evaluate_model(
gen_dataset=dataset,
metrics=[Metric("Accuracy", mask='test')])['test']
# Node for attack
node_idx = 0

gcn_gcn_model_manager.gnn.eval()
# Model prediction on a node before an evasion attack on it
gnn_model_manager.gnn.eval()
with torch.no_grad():
probabilities = torch.exp(gcn_gcn_model_manager.gnn(dataset.data.x, dataset.data.edge_index))
probabilities = torch.exp(gnn_model_manager.gnn(dataset.data.x, dataset.data.edge_index))

predicted_class = probabilities[node_idx].argmax().item()
predicted_probability = probabilities[node_idx][predicted_class].item()
real_class = dataset.data.y[node_idx].item()

original_dataset_predictions_info = {"metric_original_dataset": metric_original_dataset,
"node_idx": node_idx,
"predicted_class": predicted_class,
"predicted_probability": predicted_probability,
"real_class": real_class}
info_before_evasion_attack = {"node_idx": node_idx,
"predicted_class": predicted_class,
"predicted_probability": predicted_probability,
"real_class": real_class}

# Attack
nettack_poison_attack_config = ConfigPattern(
_class_name="NettackPoisonAttack",
_import_path=POISON_ATTACK_PARAMETERS_PATH,
_config_class="PoisonAttackConfig",
# Attack config
evasion_attack_config = ConfigPattern(
_class_name="NettackEvasionAttacker",
_import_path=EVASION_ATTACK_PARAMETERS_PATH,
_config_class="EvasionAttackConfig",
_config_kwargs={
"node_idx": node_idx,
"n_perturbations": 20,
"perturb_features": True,
"perturb_structure": True,
"hidden": gcn_gcn_model_manager.gnn.GCNConv_0.out_channels
"direct": True,
"n_influencers": 0
}
)
new_gcn_gcn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn')
new_gcn_gcn_model_manager = FrameworkGNNModelManager(
gnn=new_gcn_gcn,
dataset_path=results_dataset_path,
manager_config=manager_config,
modification=ModelModificationConfig(model_ver_ind=0, epochs=0)
)

new_gcn_gcn_model_manager.set_poison_attacker(poison_attack_config=nettack_poison_attack_config)
gnn_model_manager.set_evasion_attacker(evasion_attack_config=evasion_attack_config)

# TODO сделать сохранение разбиения test/train
# train_mask, val_mask, test_mask, train_test_sizes = torch.load(train_test_split_path / 'train_test_split')[:]
# dataset.train_mask, dataset.val_mask, dataset.test_mask = train_mask, val_mask, test_mask
# data.percent_train_class, data.percent_test_class = train_test_sizes

new_gcn_gcn_model_manager.train_model(gen_dataset=dataset,
steps=steps_epochs,
save_model_flag=False,
metrics=[Metric("F1", mask='train', average=None)])

metric_poison_dataset = new_gcn_gcn_model_manager.evaluate_model(
gen_dataset=new_gcn_gcn_model_manager.poison_attacker.attack_diff,
metrics=[Metric("Accuracy", mask='test')])['test']
# Attack
gnn_model_manager.evaluate_model(gen_dataset=dataset, metrics=[Metric("F1", mask='test', average='macro')])

new_gcn_gcn_model_manager.gnn.eval()
# Model prediction on a node after an evasion attack on it
with torch.no_grad():
probabilities = torch.exp(new_gcn_gcn_model_manager.gnn(new_gcn_gcn_model_manager.poison_attacker.attack_diff.data.x,
new_gcn_gcn_model_manager.poison_attacker.attack_diff.data.edge_index))
probabilities = torch.exp(gnn_model_manager.gnn(gnn_model_manager.evasion_attacker.attack_diff.data.x,
gnn_model_manager.evasion_attacker.attack_diff.data.edge_index))

predicted_class = probabilities[node_idx].argmax().item()
predicted_probability = probabilities[node_idx][predicted_class].item()
real_class = dataset.data.y[node_idx].item()

poisoned_dataset_predictions_info = {"metric_poison_dataset": metric_poison_dataset,
"node_idx": node_idx,
"predicted_class": predicted_class,
"predicted_probability": predicted_probability,
"real_class": real_class}
info_after_evasion_attack = {"node_idx": node_idx,
"predicted_class": predicted_class,
"predicted_probability": predicted_probability,
"real_class": real_class}

print(original_dataset_predictions_info)
print(poisoned_dataset_predictions_info)
print(f"info_before_evasion_attack: {info_before_evasion_attack}")
print(f"info_after_evasion_attack: {info_after_evasion_attack}")


if __name__ == '__main__':
# test_attack_defense()
test_nettack_attack()
test_nettack_evasion()



8 changes: 8 additions & 0 deletions metainfo/evasion_attack_parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
},
"FGSM": {
"epsilon": ["epsilon", "float", 0.1, {"min": 0.0001, "step": 0.01}, "?"]
},
"NettackEvasionAttacker": {
"node_idx": ["node_idx", "int", 0, {"min": 0, "step": 1}, "Index of the node to attack"],
"n_perturbations": ["n_perturbations", "int", null, {"min": 0, "step": 1}, "Number of perturbations. If None, then n_perturbations = degree(node_idx)"],
"perturb_features": ["perturb_features", "bool", true, {}, "Indicates whether the features can be changed"],
"perturb_structure": ["perturb_structure", "bool", true, {}, "Indicates whether the structure can be changed"],
"direct": ["direct", "bool", true, {}, "Indicates whether to directly modify edges/features of the node attacked or only those of influencers"],
"n_influencers": ["n_influencers", "int", 0, {"min": 0, "step": 1}, "Number of influencing nodes. Will be ignored if direct is True"]
}
}

6 changes: 0 additions & 6 deletions metainfo/poison_attack_parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@
},
"RandomPoisonAttack": {
"n_edges_percent": ["n_edges_percent", "float", 0.1, {"min": 0.0001, "step": 0.01}, "?"]
},
"NettackPoisonAttack": {
"node_idx": ["node_idx", "int", 0, {"min": 0, "step": 1}, "Index of the node to attack"],
"perturb_features": ["perturb_features", "bool", true, {}, "Indicates whether the features can be changed"],
"perturb_structure": ["perturb_structure", "bool", true, {}, "Indicates whether the structure can be changed"],
"hidden": ["hidden", "int", 16, {"min": 1, "step": 1}, "Dimension of hidden layer"]
}

}
Expand Down
103 changes: 103 additions & 0 deletions src/attacks/evasion_attacks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import torch
import torch.nn.functional as F
import numpy as np

from attacks.attack_base import Attacker

# Nettack imports
from src.attacks.nettack.nettack import Nettack
from src.attacks.nettack.utils import preprocess_graph, largest_connected_components, data_to_csr_matrix, train_w1_w2


class EvasionAttacker(Attacker):
def __init__(self, **kwargs):
Expand Down Expand Up @@ -35,3 +40,101 @@ def attack(self, model_manager, gen_dataset, mask_tensor):
perturbed_data_x = torch.clamp(perturbed_data_x, 0, 1)
gen_dataset.data.x = perturbed_data_x.detach()
return gen_dataset


class NettackEvasionAttacker(EvasionAttacker):
name = "NettackEvasionAttacker"

def __init__(self,
node_idx=0,
n_perturbations=None,
perturb_features=True,
perturb_structure=True,
direct=True,
n_influencers=0
):

super().__init__()
self.attack_diff = None
self.node_idx = node_idx
self.n_perturbations = n_perturbations
self.perturb_features = perturb_features
self.perturb_structure = perturb_structure
self.direct = direct
self.n_influencers = n_influencers

def attack(self, model_manager, gen_dataset, mask_tensor):
# Prepare
data = gen_dataset.data
_A_obs, _X_obs, _z_obs = data_to_csr_matrix(data)
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = largest_connected_components(_A_obs)

_A_obs = _A_obs[lcc][:, lcc]

assert np.abs(_A_obs - _A_obs.T).sum() == 0, "Input graph is not symmetric"
assert _A_obs.max() == 1 and len(np.unique(_A_obs[_A_obs.nonzero()].A1)) == 1, "Graph must be unweighted"
assert _A_obs.sum(0).A1.min() > 0, "Graph contains singleton nodes"

_X_obs = _X_obs[lcc].astype('float32')
_z_obs = _z_obs[lcc]
_N = _A_obs.shape[0]
_K = _z_obs.max() + 1
_Z_obs = np.eye(_K)[_z_obs]
_An = preprocess_graph(_A_obs)
degrees = _A_obs.sum(0).A1

if self.n_perturbations is None:
self.n_perturbations = int(degrees[self.node_idx])
hidden = model_manager.gnn.GCNConv_0.out_channels
# End prepare

# Learn matrix W1 and W2
W1, W2 = train_w1_w2(dataset=gen_dataset, hidden=hidden)

# Attack
nettack = Nettack(_A_obs, _X_obs, _z_obs, W1, W2, self.node_idx, verbose=True)

nettack.reset()
nettack.attack_surrogate(n_perturbations=self.n_perturbations,
perturb_structure=self.perturb_structure,
perturb_features=self.perturb_features,
direct=self.direct,
n_influencers=self.n_influencers)

print(f'edges: {nettack.structure_perturbations}')
print(f'features: {nettack.feature_perturbations}')

self._evasion(gen_dataset, nettack.feature_perturbations, nettack.structure_perturbations)
self.attack_diff = gen_dataset

return gen_dataset

def attack_diff(self):
return self.attack_diff

@staticmethod
def _evasion(gen_dataset, feature_perturbations, structure_perturbations):
cleaned_feat_pert = list(filter(None, feature_perturbations))
if cleaned_feat_pert: # list is not empty
x = gen_dataset.data.x.clone()
for vertex, feature in cleaned_feat_pert:
if x[vertex, feature] == 0.0:
x[vertex, feature] = 1.0
elif x[vertex, feature] == 1.0:
x[vertex, feature] = 0.0
gen_dataset.data.x = x

cleaned_struct_pert = list(filter(None, structure_perturbations))
if cleaned_struct_pert: # list is not empty
edge_index = gen_dataset.data.edge_index.clone()
# add edges
for edge in cleaned_struct_pert:
edge_index = torch.cat((edge_index,
torch.tensor((edge[0], edge[1]), dtype=torch.int32).to(torch.int64).unsqueeze(1)), dim=1)
edge_index = torch.cat((edge_index,
torch.tensor((edge[1], edge[0]), dtype=torch.int32).to(torch.int64).unsqueeze(1)), dim=1)

gen_dataset.data.edge_index = edge_index

16 changes: 8 additions & 8 deletions src/attacks/nettack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ def __init__(self, num_features, hidden, num_classes):
super(GNNLinear, self).__init__()

# Initialize the layers
self.conv1 = GCNConv(num_features, hidden, add_self_loops=False, bias=False)
self.conv2 = GCNConv(hidden, num_classes, add_self_loops=False, bias=False)
self.conv0 = GCNConv(num_features, hidden, add_self_loops=False, bias=False)
self.conv1 = GCNConv(hidden, num_classes, add_self_loops=False, bias=False)

def forward(self, x=None, edge_index=None, **kwargs):
x = self.conv0(x, edge_index)
x = self.conv1(x, edge_index)
x = self.conv2(x, edge_index)
x = F.log_softmax(x, dim=1)
return x

Expand Down Expand Up @@ -84,16 +84,16 @@ def data_to_csr_matrix(data):
return adj_matrix, attr_matrix, labels


def learn_w1_w2(dataset, hidden):
def train_w1_w2(dataset, hidden):
data = dataset.data
# TODO передавать параметр hidden

model_gnn_lin = GNNLinear(dataset.num_node_features, hidden, dataset.num_classes)

optimizer = torch.optim.Adam(model_gnn_lin.parameters(),
lr=0.001,
weight_decay=5e-4)

num_epochs = 2000
num_epochs = 1000
print("Train surrogate model")
for epoch in tqdm(range(num_epochs)):
model_gnn_lin.train()
Expand All @@ -104,8 +104,8 @@ def learn_w1_w2(dataset, hidden):
optimizer.step()
print("End training")

W1 = model_gnn_lin.conv1.lin.weight.T
W2 = model_gnn_lin.conv2.lin.weight.T
W1 = model_gnn_lin.conv0.lin.weight.T
W2 = model_gnn_lin.conv1.lin.weight.T
return W1, W2


Expand Down
Loading

0 comments on commit 940f0eb

Please sign in to comment.