Skip to content

Commit 9772048

Browse files
committed
Fixed benchmarking script.
1 parent 2bf761a commit 9772048

File tree

1 file changed

+19
-36
lines changed

1 file changed

+19
-36
lines changed

benchmarks/run_benchmark.py

+19-36
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,20 @@
3333
"FreeSolv": "FreeSolv",
3434
"Lipophilicity": "Lipophilicity",
3535
}
36-
dataset_paths = {
37-
"Photoswitch": "../data/property_prediction/Photoswitch.csv",
38-
"ESOL": "../data/property_prediction/ESOL.csv",
39-
"FreeSolv": "../data/property_prediction/FreeSolv.csv",
40-
"Lipophilicity": "../data/property_prediction/Lipophilicity.csv",
41-
}
36+
37+
featurisations = [
38+
"ecfp_fingerprints",
39+
"fragments",
40+
"ecfp_fragprints",
41+
"bag_of_smiles",
42+
"bag_of_selfies",
43+
]
4244

4345

4446
def main(
4547
n_trials,
4648
test_set_size,
4749
dataset_name,
48-
dataset_path,
4950
featurisation,
5051
gp_model,
5152
):
@@ -55,11 +56,7 @@ def main(
5556
n_trials: Number of random train/test splits for the datasets. Default is 20
5657
test_set_size: Size of the test set for evaluation. Default is 0.2
5758
dataset_name: Benchmark dataset to use. One of ['Photoswitch', 'ESOL', 'FreeSolv', 'Lipophilicity']
58-
dataset_path: Benchmark dataset path. One of ['../data/property_prediction/Photoswitch.csv',
59-
../data/property_prediction/ESOL.csv',
60-
'../data/property_prediction/FreeSolv.csv',
61-
'../data/property_prediction/Lipophilicity.csv']
62-
featurisation: Choice of features. One of ['fingerprints', 'fragments', 'fragprints', 'bag_of_smiles',
59+
featurisation: Choice of features. One of ['ecfp_fingerprints', 'fragments', 'ecfp_fragprints', 'bag_of_smiles',
6360
'bag_of_selfies']
6461
gp_model: Choice of model. One of ['Tanimoto', 'Scalar Product']
6562
@@ -72,16 +69,15 @@ def main(
7269
f"The specified dataset choice ({dataset_name}) is not a valid option. "
7370
f"Choose one of {list(dataset_names.keys())}."
7471
)
75-
if dataset_path not in dataset_paths.values():
76-
raise ValueError(f"The specified dataset path ({dataset_path}) is not a valid option. "
77-
f"Choose one of {list(dataset_paths.values())}.")
78-
if featurisation not in featurisations.values():
79-
raise ValueError(f"The specified featurisation ({featurisation}) is not a valid option. "
80-
f"Choose one of {list(featurisations.values())}.")
72+
if featurisation not in featurisations:
73+
raise ValueError(
74+
f"The specified featurisation ({featurisation}) is not a valid option. "
75+
f"Choose one of {featurisations}."
76+
)
8177

8278
# Load the benchmark dataset
8379
loader = MolPropLoader()
84-
loader.load_benchmark(dataset_name, dataset_path)
80+
loader.load_benchmark(dataset_name)
8581

8682
# Choose the featurisation
8783
loader.featurize(featurisation)
@@ -99,7 +95,6 @@ def main(
9995
qce_list = []
10096

10197
for i in range(0, n_trials):
102-
10398
print(f"Trial {i} of {n_trials}")
10499

105100
X_train, X_test, y_train, y_test = train_test_split(
@@ -157,7 +152,7 @@ def main(
157152
try:
158153
nlpd = negative_log_predictive_density(trained_pred_dist, y_test)
159154
except:
160-
Exception(f'NLPD calculation failed on trial {i}')
155+
Exception(f"NLPD calculation failed on trial {i}")
161156
continue
162157

163158
# Compute MSLL on Test set
@@ -254,7 +249,6 @@ def main(
254249

255250

256251
if __name__ == "__main__":
257-
258252
parser = argparse.ArgumentParser()
259253

260254
parser.add_argument(
@@ -278,23 +272,13 @@ def main(
278272
default="Lipophilicity",
279273
help="Dataset to use. One of [Photoswitch, ESOL, FreeSolv, Lipophilicity]",
280274
)
281-
parser.add_argument(
282-
"-p",
283-
"--path",
284-
type=str,
285-
default="../data/property_prediction/Lipophilicity.csv",
286-
help="Path to the dataset file. One of [../data/property_prediction/Photoswitch.csv, "
287-
"../data/property_prediction/ESOL.csv, "
288-
"../data/property_prediction/FreeSolv.csv, "
289-
"../data/property_prediction/Lipophilicity.csv]",
290-
)
291275
parser.add_argument(
292276
"-r",
293277
"--featurisation",
294278
type=str,
295-
default="fingerprints",
296-
help="str specifying the molecular featurisation. "
297-
"One of [fingerprints, fragments, fragprints].",
279+
default="ecfp_fingerprints",
280+
help="Choice of features. One of ['ecfp_fingerprints', 'fragments', "
281+
"'ecfp_fragprints', 'bag_of_smiles', 'bag_of_selfies']",
298282
)
299283
parser.add_argument(
300284
"-m",
@@ -309,7 +293,6 @@ def main(
309293
args.n_trials,
310294
args.test_set_size,
311295
args.dataset,
312-
args.path,
313296
args.featurisation,
314297
args.model,
315298
)

0 commit comments

Comments
 (0)