-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_onlyassays.py
103 lines (74 loc) · 3.64 KB
/
predict_onlyassays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import importlib
import torch
import numpy as np
from torch.utils.data import DataLoader
device='cpu';
#****************************************************************************
#****************** some pre-defined lists of features **********************
#****************************************************************************
#IMPORTANT: THESE HAVE TO BE SET MANUALLY
features_to_be_scaled= TO_BE_SET
global_features = TO_BE_SET
#***********************************************************************************
def get_data( global_features, labels ):
bPKs= [];
global_feat_list = [];
for i, smi in enumerate(labels):
bPKs.append(int(labels[i]));
global_feat_list.append(torch.from_numpy(global_features[i,:]));
return global_feat_list, bPKs;
#***********************************************************************************
def collate(sample):
global_feats, labels = map(list,zip(*sample));
global_feats = torch.stack([torch.tensor(tmp) for tmp in global_feats]);
return global_feats, torch.tensor(labels);
#**********************************************************************************
def scale_features(df):
from sklearn.preprocessing import StandardScaler
import pickle
scalerfile = "scaler_onlyassays.sav";
final_train_scaler = pickle.load(open(scalerfile, 'rb'));
df[features_to_be_scaled] = final_train_scaler.transform(df[features_to_be_scaled]);
return df;
#***********************************************************************************
def predict(df, model_path, scale=True, out_rank=1):
df = df.copy();
print("Scaling the features ...");
if scale:
df = scale_features(df);
import sys
sys.path.insert(1, model_path);
mod = importlib.import_module('Predictor_onlyGlobalFeats_wdropout');
importlib.reload(mod);
Predictor_onlyGlobalFeats_wdropout = getattr(mod, 'Predictor_onlyGlobalFeats_wdropout');
num_models = 10;
model_list = [];
print("Read the models from " + str(model_path));
for i in range(10):
tmp_model = Predictor_onlyGlobalFeats_wdropout( global_feats=len(global_features), num_layers=7,
n_tasks=5, predictor_hidden_feats=256);
tmp_model.load_state_dict(torch.load(model_path + "/weights_" + str(i) + ".pth", map_location=torch.device('cpu')));
tmp_model.eval();
model_list.append(tmp_model);
print("Make graphs ...");
global_feat_list, bPK = get_data(df[global_features].to_numpy(dtype=np.float32),
[1]*df.shape[0]);
test_data = list(zip( global_feat_list, bPK)).copy();
test_loader = DataLoader(test_data, batch_size=256, shuffle=False, collate_fn=collate, drop_last=False);
print("Make predictions ...");
preds = [];
model_list_device = [tmp_model.to(device) for tmp_model in model_list];
for i, ( global_feats, labels) in enumerate(test_loader):
labels = labels.to(device);
global_feats = global_feats.to(device);
probs = np.zeros(len(labels));
for tmp_model in model_list_device:
logits, probas = tmp_model( global_feats);
tmp_probs = probas.detach().to('cpu').numpy();
if out_rank==-1:
# take the sum over the predicted probabilities
probs = probs + (1.0/num_models)* (tmp_probs[:,0] + tmp_probs[:,1] + tmp_probs[:,2] + tmp_probs[:,3]);
else:
probs = probs + (1.0/num_models)* tmp_probs[:,out_rank];
preds = preds + list(probs);
return preds;