33
33
"FreeSolv" : "FreeSolv" ,
34
34
"Lipophilicity" : "Lipophilicity" ,
35
35
}
36
- dataset_paths = {
37
- "Photoswitch" : "../data/property_prediction/Photoswitch.csv" ,
38
- "ESOL" : "../data/property_prediction/ESOL.csv" ,
39
- "FreeSolv" : "../data/property_prediction/FreeSolv.csv" ,
40
- "Lipophilicity" : "../data/property_prediction/Lipophilicity.csv" ,
41
- }
36
+
37
+ featurisations = [
38
+ "ecfp_fingerprints" ,
39
+ "fragments" ,
40
+ "ecfp_fragprints" ,
41
+ "bag_of_smiles" ,
42
+ "bag_of_selfies" ,
43
+ ]
42
44
43
45
44
46
def main (
45
47
n_trials ,
46
48
test_set_size ,
47
49
dataset_name ,
48
- dataset_path ,
49
50
featurisation ,
50
51
gp_model ,
51
52
):
@@ -55,11 +56,7 @@ def main(
55
56
n_trials: Number of random train/test splits for the datasets. Default is 20
56
57
test_set_size: Size of the test set for evaluation. Default is 0.2
57
58
dataset_name: Benchmark dataset to use. One of ['Photoswitch', 'ESOL', 'FreeSolv', 'Lipophilicity']
58
- dataset_path: Benchmark dataset path. One of ['../data/property_prediction/Photoswitch.csv',
59
- ../data/property_prediction/ESOL.csv',
60
- '../data/property_prediction/FreeSolv.csv',
61
- '../data/property_prediction/Lipophilicity.csv']
62
- featurisation: Choice of features. One of ['fingerprints', 'fragments', 'fragprints', 'bag_of_smiles',
59
+ featurisation: Choice of features. One of ['ecfp_fingerprints', 'fragments', 'ecfp_fragprints', 'bag_of_smiles',
63
60
'bag_of_selfies']
64
61
gp_model: Choice of model. One of ['Tanimoto', 'Scalar Product']
65
62
@@ -72,16 +69,15 @@ def main(
72
69
f"The specified dataset choice ({ dataset_name } ) is not a valid option. "
73
70
f"Choose one of { list (dataset_names .keys ())} ."
74
71
)
75
- if dataset_path not in dataset_paths .values ():
76
- raise ValueError (f"The specified dataset path ({ dataset_path } ) is not a valid option. "
77
- f"Choose one of { list (dataset_paths .values ())} ." )
78
- if featurisation not in featurisations .values ():
79
- raise ValueError (f"The specified featurisation ({ featurisation } ) is not a valid option. "
80
- f"Choose one of { list (featurisations .values ())} ." )
72
+ if featurisation not in featurisations :
73
+ raise ValueError (
74
+ f"The specified featurisation ({ featurisation } ) is not a valid option. "
75
+ f"Choose one of { featurisations } ."
76
+ )
81
77
82
78
# Load the benchmark dataset
83
79
loader = MolPropLoader ()
84
- loader .load_benchmark (dataset_name , dataset_path )
80
+ loader .load_benchmark (dataset_name )
85
81
86
82
# Choose the featurisation
87
83
loader .featurize (featurisation )
@@ -99,7 +95,6 @@ def main(
99
95
qce_list = []
100
96
101
97
for i in range (0 , n_trials ):
102
-
103
98
print (f"Trial { i } of { n_trials } " )
104
99
105
100
X_train , X_test , y_train , y_test = train_test_split (
@@ -157,7 +152,7 @@ def main(
157
152
try :
158
153
nlpd = negative_log_predictive_density (trained_pred_dist , y_test )
159
154
except :
160
- Exception (f' NLPD calculation failed on trial { i } ' )
155
+ Exception (f" NLPD calculation failed on trial { i } " )
161
156
continue
162
157
163
158
# Compute MSLL on Test set
@@ -254,7 +249,6 @@ def main(
254
249
255
250
256
251
if __name__ == "__main__" :
257
-
258
252
parser = argparse .ArgumentParser ()
259
253
260
254
parser .add_argument (
@@ -278,23 +272,13 @@ def main(
278
272
default = "Lipophilicity" ,
279
273
help = "Dataset to use. One of [Photoswitch, ESOL, FreeSolv, Lipophilicity]" ,
280
274
)
281
- parser .add_argument (
282
- "-p" ,
283
- "--path" ,
284
- type = str ,
285
- default = "../data/property_prediction/Lipophilicity.csv" ,
286
- help = "Path to the dataset file. One of [../data/property_prediction/Photoswitch.csv, "
287
- "../data/property_prediction/ESOL.csv, "
288
- "../data/property_prediction/FreeSolv.csv, "
289
- "../data/property_prediction/Lipophilicity.csv]" ,
290
- )
291
275
parser .add_argument (
292
276
"-r" ,
293
277
"--featurisation" ,
294
278
type = str ,
295
- default = "fingerprints " ,
296
- help = "str specifying the molecular featurisation. "
297
- "One of [fingerprints, fragments, fragprints]. " ,
279
+ default = "ecfp_fingerprints " ,
280
+ help = "Choice of features. One of ['ecfp_fingerprints', 'fragments', "
281
+ "'ecfp_fragprints', 'bag_of_smiles', 'bag_of_selfies'] " ,
298
282
)
299
283
parser .add_argument (
300
284
"-m" ,
@@ -309,7 +293,6 @@ def main(
309
293
args .n_trials ,
310
294
args .test_set_size ,
311
295
args .dataset ,
312
- args .path ,
313
296
args .featurisation ,
314
297
args .model ,
315
298
)
0 commit comments