From a04d2ea728b17435bcde45ccc281df368dafaa4a Mon Sep 17 00:00:00 2001
From: Osman Mamun <mamun.che06@gmail.com>
Date: Wed, 27 Mar 2024 14:40:33 -0500
Subject: [PATCH] updates

---
 experiments/test_1/script.py |  1 -
 src/mobo_qm9.py              | 80 +++++++++++++++++++++++-------------
 2 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/experiments/test_1/script.py b/experiments/test_1/script.py
index f6dc910..0af9f0f 100644
--- a/experiments/test_1/script.py
+++ b/experiments/test_1/script.py
@@ -9,7 +9,6 @@
 params = MOBOQM9Parameters(featurizer="CM",
                            kernel="RBF",
                            surrogate_model="GaussianProcess",
-                           acq_func="qEHVI",
                            targets=["gap", "mu"],
                            target_bools=[True, True],
                            num_total_points=100,
diff --git a/src/mobo_qm9.py b/src/mobo_qm9.py
index 8407a19..ce2c088 100644
--- a/src/mobo_qm9.py
+++ b/src/mobo_qm9.py
@@ -9,6 +9,7 @@
 from botorch.fit import fit_gpytorch_mll
 from botorch.models.transforms.input import Normalize
 from botorch.models.transforms.outcome import Standardize
+import pandas as pd
 
 from .data.cm_featurizer import get_coulomb_matrix
 from .acquisition_functions import optimize_qEHVI, optimize_qNEHVI
@@ -23,7 +24,6 @@ class MOBOQM9Parameters(NamedTuple):
         featurizer: Featurizer to use.
         kernel: Kernel to use.
         surrogate_model: Surrogate model to use.
-        acq_func: Acquisition function to use.
         targets: List of targets to optimize.
         target_bools: List of booleans indicating wheather to minimize or 
             maximize each target.
@@ -35,7 +35,6 @@ class MOBOQM9Parameters(NamedTuple):
     featurizer: Literal["ECFP", "CM", "ACSF"]
     kernel: Literal["RBF", "Matern"]
     surrogate_model: Literal["GaussianProcess", "RandomForest"]
-    acq_func: Literal["qEHVI", "qNEHVI", "random"]
     targets: List[str]
     target_bools: List[bool]
     num_candidates: int = 1
@@ -60,8 +59,24 @@ def __init__(self, params: MOBOQM9Parameters):
             self.params.num_total_points)
         self.features, self.targets = self.get_features_and_targets()
         self.train_indices = self.get_train_indices()
-        self.dataframe = None 
+        self.dataframe = pd.DataFrame.from_dict(self.from_target_dict())
+        self.acq_met = {"qEHVI": False, "qNEHVI": False, "random": False}
 
+    def form_target_dict(self):
+        """
+        Forms the target dictionary for the MOBOQM9 model.
+        
+        returns:
+            target_dict: Target dictionary for the MOBOQM9 model.
+        """
+        target_dict = {"iteration": None}
+        for i, target in enumerate(self.params.targets):
+            target_dict[target] = self.targets[:, i]
+            target_dict["target_qEHVI"] = None
+            target_dict["target_qNEHVI"] = None
+            target_dict["target_random"] = None
+        return target_dict
+    
     def get_features_and_targets(self):
         """
         Gets the features and targets for the MOBOQM9 model.
@@ -77,16 +92,19 @@ def get_features_and_targets(self):
         else:
             raise NotImplementedError
     
-    def get_surrogate_model(self):
+    def get_surrogate_model(self, acq):
         """
         Gets the surrogate model for the MOBOQM9 model.
             
+        args:
+            acq: Acquisition function to use.
+        
         returns:
             model: Surrogate model for the MOBOQM9 model.
         """
-        features = torch.tensor(self.features[self.train_indices],
+        features = torch.tensor(self.features[self.train_indices["acq"]],
                                 dtype=torch.double)
-        targets = torch.tensor(self.correct_sign(self.targets[self.train_indices]),
+        targets = torch.tensor(self.correct_sign(self.targets[self.train_indices["acq"]]),
                                 dtype=torch.double)
         var = torch.full_like(targets, 1e-6)
 
@@ -120,28 +138,29 @@ def correct_sign(self, Y):
                 y_copy[:, idx] *= -1
         return y_copy
     
-    def optimize_acquisition_function(self, model):
+    def optimize_acquisition_function(self, model, acq):
         """
         Optimizes the acquisition function for the MOBOQM9 model.
         
         args:
             model: Surrogate model for the MOBOQM9 model.
+            acq: Acquisition function to use.
         
         returns:
             candidates: Candidates for the MOBOQM9 model.
         """
-        y_train = self.correct_sign(self.targets[self.train_indices])
+        y_train = self.correct_sign(self.targets[self.train_indices["acq"]])
         y_train = torch.tensor(y_train, dtype=torch.double)
-        x_train = torch.tensor(self.features[self.train_indices], dtype=torch.double)
-        x_test = torch.tensor(self.features[~self.train_indices], dtype=torch.double)
+        x_train = torch.tensor(self.features[self.train_indices["acq"]], dtype=torch.double)
+        x_test = torch.tensor(self.features[~self.train_indices["acq"]], dtype=torch.double)
         reference = y_train.mean(0)[0]
-        if self.params.acq_func == "qEHVI":
+        if acq == "qEHVI":
             return optimize_qEHVI(model=model,
                                   reference=reference,
                                   y_train=y_train,
                                   x_test=x_test,
                                   n_candidates=self.params.num_candidates)
-        elif self.params.acq_func == "qNEHVI":
+        elif acq == "qNEHVI":
             return optimize_qNEHVI(model=model,
                                    reference=reference,
                                    x_train=x_train,
@@ -156,16 +175,19 @@ def run_optimization(self):
         """
         for iter in range(self.params.n_iters):
             logger.info(f"MOBOQM9 iteration {iter + 1} of {self.params.n_iters}.")
-            model = self.get_surrogate_model()
-            if self.params.acq_func == "random":
-                for _ in range(self.params.num_candidates):
-                    idx = np.random.choice(np.where(~self.train_indices)[0])
-                    self.train_indices[idx] = True
-            else:
-                candidates = self.optimize_acquisition_function(model)
-                self.update_train_indices(candidates)
-            if self.stopping_criteria_met():
-                break
+            for acq in ["qEHVI", "qNEHVI", "random"]:
+                if self.acq_met[acq]:
+                    continue
+                model = self.get_surrogate_model(acq)
+                if acq == "random":
+                    for _ in range(self.params.num_candidates):
+                        idx = np.random.choice(np.where(~self.train_indices)[0])
+                        self.train_indices[acq][idx] = True
+                else:
+                    candidates = self.optimize_acquisition_function(model)
+                    self.update_train_indices(candidates, acq)
+                self.stopping_criteria_met(acq)
+                
         logger.info("MOBOQM9 optimization finished.")
     
     def get_train_indices(self):
@@ -180,9 +202,9 @@ def get_train_indices(self):
             self.params.num_seed_points)
         mask = np.zeros(len(self.total_indices), dtype=bool)
         mask[temp_indices] = True
-        return mask
+        return {"qEHVI": mask, "qNEHVI": mask, "random": mask}
     
-    def stopping_criteria_met(self):
+    def stopping_criteria_met(self, acq):
         """
         Checks if the MOBOQM9 optimization has met the stopping criteria.
         
@@ -190,7 +212,7 @@ def stopping_criteria_met(self):
             bool: True if the MOBOQM9 optimization has met the stopping criteria.
         """
         y_global = torch.tensor(self.targets)
-        y_current = torch.tensor(self.targets[self.train_indices])
+        y_current = torch.tensor(self.targets[self.train_indices[acq]])
         ref_points = y_global.min(0)[0]
         bd_global = DominatedPartitioning(
             ref_point=ref_points,
@@ -202,20 +224,21 @@ def stopping_criteria_met(self):
             Y=y_current,
         )
         volume_current = bd_current.compute_hypervolume().item()
-        return volume_global == volume_current
+        self.acq_met[acq] = (volume_global == volume_current)
         
     
-    def update_train_indices(self, candidates):
+    def update_train_indices(self, candidates, acq):
         """
         Updates the train indices for the MOBOQM9 model.
         
         args:
             candidates: Candidates for the MOBOQM9 model.
+            acq: Acquisition function to use.
         """
         for cand in candidates:
             for idx, feat in enumerate(self.features):
                 if np.allclose(feat, cand):
-                    self.train_indices[idx] = True
+                    self.train_indices[acq][idx] = True
     
     def validate_params(self):
         """
@@ -227,7 +250,6 @@ def validate_params(self):
         assert self.params.featurizer in ["ECFP", "CM", "ACSF"], "Featurizer must be one of ECFP, CM, or ACSF."
         assert self.params.kernel in ["RBF", "Matern", "Tanimoto"], "Kernel must be one of RBF, Matern."
         assert self.params.surrogate_model in ["GaussianProcess", "RandomForest"], "Surrogate model must be one of GaussianProcess, or RandomForest."
-        assert self.params.acq_func in ["qEHVI", "qNEHVI", "random"], "Acquisition function must be one of qEHVI, or qNEHVI, or random."
         assert len(self.params.targets) == len(self.params.target_bools), "Number of targets must equal number of target booleans."
         assert self.params.num_total_points > 0, "Number of total points must be greater than zero."
         assert self.params.num_seed_points > 0, "Number of seed points must be greater than zero."