-
Notifications
You must be signed in to change notification settings - Fork 0
/
training.py
176 lines (157 loc) · 6.35 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# coding=utf-8
from torch_geometric.nn import HeteroConv, GCNConv, SAGEConv, GATConv, Linear, HGTConv
from torch_geometric.loader import DataLoader
from utils.models import HeteroGNN
from typing import Optional
from tqdm import tqdm
import torch_geometric.transforms as T
import torch.nn.functional as F
import utils.config as config
import networkx as nx
import numpy as np
import torchmetrics
import torch
import os
# ---------------------------------------------------------------------
def load_dataset_from_folder(folder_path, batch_size=24):
pt_dir = os.listdir(folder_path)
data_list = []
for filename in tqdm(pt_dir, desc='[-] Reading ptfiles', unit_scale=False):
pt_path = os.path.join(folder_path, filename)
data_list += torch.load(pt_path)
for data in data_list:
data = T.ToUndirected()(data)
data = data.pin_memory()
data = data.to('cuda:0', non_blocking=True)
dataset = DataLoader(data_list, batch_size=batch_size, shuffle=True)
return dataset
# ---------------------------------------------------------------------
def train(loader, epoch, num_epochs):
model.train()
loss_ = 0
acc = torchmetrics.Accuracy(task="binary", average='none')
tqdm_loop = tqdm((loader), total=len(loader))
for data in tqdm_loop:
optimizer.zero_grad()
out = model(data)
pred = F.softmax(out, dim=1).argmax(dim=1)
loss = criterion(out, data['y'])
loss.backward()
optimizer.step()
loss_ += loss
acc(pred.cpu(), data['y'].cpu())
tqdm_loop.set_description(f'Epoch [{epoch}/{num_epochs}]')
tqdm_loop.set_postfix(loss=loss.item(), acc=acc.compute().item())
return
def eval(loader, j):
acc = torchmetrics.Accuracy(task="binary", average='none')
recall = torchmetrics.Recall(task="binary", average='none')
precision = torchmetrics.Precision(task="binary", average='none')
auc = torchmetrics.AUROC(task="multiclass", average="macro", num_classes=2)
f1 = torchmetrics.F1Score(task="binary")
roc = torchmetrics.classification.BinaryROC()
model.eval()
loss = 0
TP, TN, FP, FN = 0, 0, 0, 0
with torch.no_grad():
for data in loader:
y = data['y'].cpu()
out = model(data).cpu()
pred = F.softmax(out, dim=1).argmax(dim=1)
pred = pred.float()
loss += criterion(out, y)
tp = int(((pred == y)*pred).sum())
tn = int((pred == y).sum()) - tp
fp = int(((pred != y)*pred).sum())
fn = int((pred != y).sum()) - fp
TP += tp
TN += tn
FP += fp
FN += fn
auc.update(F.softmax(out, dim=1), y)
acc(pred, y)
recall(pred, y)
precision(pred, y)
f1(pred, y)
roc.update(F.softmax(out, dim=1)[:, 1], y)
fpr, tpr, thresholds = roc.compute()
print('[*] {}_acc: {}'.format(j, acc.compute().item()))
return [(TP, TN, FP, FN),
auc.compute().item(),
acc.compute().item(),
recall.compute().item(),
precision.compute().item(),
f1.compute().item(),
(fpr, tpr, thresholds)]
# ---------------------------------------------------------------------
def save_metrics(name, train_metrics, test_metrics):
train_dict = {
'TP_TN_FP_FN': [x[0] for x in train_metrics],
'AUROC': [x[1] for x in train_metrics],
'ACCURACY': [x[2] for x in train_metrics],
'RECALL': [x[3] for x in train_metrics],
'PRECISION': [x[4] for x in train_metrics],
'F1': [x[5] for x in train_metrics],
'ROC': train_metrics[-1][6]#[x[6] for x in train_metrics],
}
test_dict = {
'TP_TN_FP_FN': [x[0] for x in test_metrics],
'AUROC': [x[1] for x in test_metrics],
'ACCURACY': [x[2] for x in test_metrics],
'RECALL': [x[3] for x in test_metrics],
'PRECISION': [x[4] for x in test_metrics],
'F1': [x[5] for x in test_metrics],
'ROC': test_metrics[-1][6]#[x[6] for x in test_metrics],
}
dic = {
'train_metrics': train_dict,
'test_metrics': test_dict
}
return np.save('./save/trace/{}_metrics.npy'.format(name), dic)
# ---------------------------------------------------------------------
if __name__ == '__main__':
config.__init__()
args = config.parser.parse_args()
TRAIN = args.train
EPOCHS = args.epochs
BATCH = args.batch # S1-4实验时, 必须设为1
outfile_name = args.outname
hgnn = args.hgnn
hc = args.hc
nl = args.nl
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = HeteroGNN(hgnn, hidden_channels=hc, num_layers=nl).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=1e-4)
criterion = torch.nn.CrossEntropyLoss()
train_dataset = load_dataset_from_folder('./save/data/train', batch_size=BATCH)
test_dataset = load_dataset_from_folder('./save/data/test', batch_size=BATCH)
# input(test_dataset.dataset[3])
# for d in test_dataset:
# input(d)
train_metrics = []
test_metrics = []
best_score = 0
best_metrics = None
# 这里需要调整一下, 因为很多次 1.0的得分, 导致没更新最优
if TRAIN:
for epoch in range(EPOCHS):
train(train_dataset, epoch, EPOCHS)
train_metrics.append(eval(train_dataset, 'train'))
test_metrics.append(eval(test_dataset, 'test'))
if test_metrics[-1][1] >= best_score:
best_score = test_metrics[-1][1]
best_metrics = test_metrics[-1]
torch.save(model.state_dict(), './save/model/albge/{}.pt'.format(outfile_name))
print('[*] Best result:')
print(best_metrics)
save_metrics(outfile_name, train_metrics, test_metrics)
else:
loadname = args.loadname
model.load(loadname)
test_metrics.append(eval(test_dataset, 'test'))
print('[*] Eval result:')
#print(test_metrics)
print('[-] {:<3} | {:<3} | {:<3} | {:<3} |'.format('tp', 'tn', 'fp', 'fn'))
print('[-] {:<3} | {:<3} | {:<3} | {:<3} |'.format(*test_metrics[0][0]))
print('[-] {:<3} | {:<3} | {:<3} | {:<3} | {:<3} |'.format('auc', 'acc', 'rec', 'pre', 'f1'))
print('[-] {:<3} | {:<3} | {:<3} | {:<3} | {:<3} |'.format(*test_metrics[0][1:6]))