Polynomial fitting for total number of atoms 10

diwadd · Jan 13, 2018 · d39e543 · d39e543
1 parent e30f233
commit d39e543
Show file tree

Hide file tree

Showing 11 changed files with 99 additions and 13 deletions.
diff --git a/general_structure_features.py b/general_structure_features.py
diff --git a/geometry_xyz.py b/geometry_xyz.py
@@ -1081,6 +1081,13 @@ def ewald_matrix_features(data,
         logger.info("total_energy_matrix trace: " + str(ewald_sum_data[i][6]))
 
     ewald_sum_data = np.hstack((ids, ewald_sum_data))
+
+    if noa != -1:
+        ewald_sum_real_energy_matrix = np.hstack((ids, ewald_sum_real_energy_matrix))
+        ewald_sum_reciprocal_energy_matrix = np.hstack((ids, ewald_sum_reciprocal_energy_matrix))
+        ewald_sum_total_energy_matrix = np.hstack((ids, ewald_sum_total_energy_matrix))
+        ewald_sum_point_energy_matrix = np.hstack((ids, ewald_sum_point_energy_matrix))
+
     np.savetxt(file_name_type + "_ewald_sum_data.csv", ewald_sum_data, delimiter=",")
     np.save(file_name_type + "_ewald_sum_data.npy", ewald_sum_data)
 
@@ -1133,12 +1140,11 @@ def ewald_matrix_features(data,
     assert np.array_equal(train_total_number_of_atoms, test_total_number_of_atoms), assert_error_text
 
 
-    # scan_through_geometry_files_and_extrac_features(train_data, data_type="train", file_name_type="train_")
-    # ewald_matrix_features(train_data, -1, data_type="train", file_name_type="train_")
-    #
-    # scan_through_geometry_files_and_extrac_features(test_data, data_type="test", file_name_type="test_")
-    # ewald_matrix_features(test_data, -1, data_type="test", file_name_type="test_")
+    #scan_through_geometry_files_and_extrac_features(train_data, data_type="train", file_name_type="train_")
+    ewald_matrix_features(train_data, -1, data_type="train", file_name_type="train_")
 
+    scan_through_geometry_files_and_extrac_features(test_data, data_type="test", file_name_type="test_")
+    ewald_matrix_features(test_data, -1, data_type="test", file_name_type="test_")
 
     for i in range(len(train_total_number_of_atoms)):
 
@@ -1153,7 +1159,7 @@ def ewald_matrix_features(data,
         logger.info("number of atoms {0}; data.shape: {1}".format(noa, conditioned_data.shape))
 
         # hist_data(data[:, -1], text=str(noa))
-        file_name_type = "train_" + str(noa) + "_"
+        file_name_type = "train_" + str(noa)
         local_data_type = "train"
         scan_through_geometry_files_and_extrac_features(conditioned_data,
                                                         data_type=local_data_type,
@@ -1172,7 +1178,7 @@ def ewald_matrix_features(data,
         conditioned_data = test_data[ condition ]
         logger.info("number of atoms {0}; data.shape: {1}".format(noa, conditioned_data.shape))
 
-        file_name_type = "test_" + str(noa) + "_"
+        file_name_type = "test_" + str(noa)
         local_data_type = "test"
         scan_through_geometry_files_and_extrac_features(conditioned_data,
                                                         data_type=data_type,

diff --git a/global_flags_constanst.py b/global_flags_constanst.py
@@ -18,9 +18,11 @@
 LABELS["bandgap_energy_ev"] = 13
 
 
-LOGGING_LEVEL = logging.INFO
+ID = 0
+NUMBER_OF_TOTAL_ATOMS = 2
 
 
+LOGGING_LEVEL = logging.INFO
 SPACE_GROUP_PROPERTIES = {12: 4,
                           33: 4,
                           167: 12,

diff --git a/graph_preformance.py b/graph_preformance.py
diff --git a/main.py b/main.py
diff --git a/models.py b/models.py
@@ -84,8 +84,17 @@ def evaluate(self, x, y_true):
             y_pred = self.predict(x)
             y_true = y_true.reshape((-1, 1))
 
-            logger.info("Five example predictions:")
-            for i in range(5):
+            logger.info("Example predictions:")
+
+            if n == 1:
+                # number of example to print
+                noetp = 1
+            elif n > 5:
+                noetp = 5
+            else:
+                noetp = 0
+
+            for i in range(noetp):
                 logger.info("y_pred: {0}; y_true: {1}".format(y_pred[i], y_true[i]))
 
             rmsle = sf.root_mean_squared_logarithmic_error(y_true, y_pred)

diff --git a/non_geometry_features.py b/non_geometry_features.py
diff --git a/plotting_features.py b/plotting_features.py
@@ -16,8 +16,8 @@
 
 #plt.scatter(features[:, 5], data[:, -1])
 #plt.hist2d(data[:, -1], features[:, 1], bins=60)
-index = 9
-target = -2
+index = 7
+target = -1
 bg_index = 14
 
 plt.scatter(custom_data[custom_data[:, bg_index] == 10, index], custom_data[custom_data[:, bg_index] == 10, target], label="10")

diff --git a/structure_visualization.sage b/structure_visualization.sage
diff --git a/support_classes.py b/support_classes.py
diff --git a/support_functions.py b/support_functions.py
@@ -114,6 +114,20 @@ def cross_validate(x,
                    model_class,
                    model_parameters=None,
                    fraction=0.1):
+    """
+    Perform normal corss validation.
+    A fraction of the total data is used as
+    the test set. If, e.g., fraction=0.1 ten
+    cross validation rounds will be performed.
+
+    :param x:
+    :param y:
+    :param model_class:
+    :param model_parameters:
+    :param fraction:
+    :return:
+    """
+
 
     logger.debug("Cross validating data.")
 
@@ -150,14 +164,15 @@ def cross_validate(x,
         model_parameters["validation_data"] = (valid_data, valid_targets)
         model = model_class(**model_parameters)
 
+
         _, train_m = train_targets.shape
         if train_m == 1:
             model.fit(train_data, train_targets.ravel())
         else:
             model.fit(train_data, train_targets)
 
         custom_data = np.hstack((valid_data, valid_targets))
-        condition = custom_data[:, gfc.LABELS["number_of_total_atoms"] - 1] == 80
+        condition = custom_data[:, gfc.LABELS["number_of_total_atoms"] - 1] == 10
         custom_data = custom_data[condition]
         custom_valid_data = custom_data[:, 0:-1]
         custom_targets_data = custom_data[:, -1].reshape(-1, 1)
@@ -179,8 +194,62 @@ def cross_validate(x,
 
     logger.info("train_avg: {0}, valid_avg: {1}".format(train_avg, valid_avg))
 
+    # This printout is used by graph_performace.py to grab the
+    # results of grap_performance.py. Print is simpler that logging.
     print(str(train_avg) + "x" + str(valid_avg), end="")
 
+
+def one_left_cross_validation(x,
+                              y,
+                              model_class,
+                              model_parameters=None,
+                              fraction=0.1):
+
+    logger.info("One left cross validation...")
+    n, m = x.shape
+
+    train_avg = 0.0
+    valid_avg = 0.0
+    for i in range(n):
+
+        train_data = np.delete(x, [i], axis=0)
+        train_targets = np.delete(y, [i], axis=0)
+
+        logger.info("train_data.shape: {0}".format(train_data.shape))
+        logger.info("train_targets.shape: {0}".format(train_targets.shape))
+
+        # valid_x is a single example so its shape
+        # should be (1, n_features)
+        valid_x = x[i, :].reshape(1, -1)
+        valid_y = y[i, :].reshape(-1, 1)
+
+        logger.info("test_x.shape: {0}".format(valid_x.shape))
+        logger.info("test_y.shape: {0}".format(valid_y.shape))
+
+        model_parameters["validation_data"] = (valid_x, valid_y)
+        model = model_class(**model_parameters)
+
+        _, train_m = train_targets.shape
+        if train_m == 1:
+            model.fit(train_data, train_targets.ravel())
+        else:
+            model.fit(train_data, train_targets)
+
+        rmsle_train = model.evaluate(train_data, train_targets)
+        rmsle_valid = model.evaluate(valid_x, valid_y)
+
+        logger.info("i: {0}, rmsle_train: {1:.9f}, rmsle_valid: {2:.9f}".format(i, rmsle_train, rmsle_valid))
+
+        train_avg = train_avg + rmsle_train
+        valid_avg = valid_avg + rmsle_valid
+
+    train_avg = train_avg/n
+    valid_avg = valid_avg/n
+
+    logger.info("train_avg: {0}, valid_avg: {1}".format(train_avg, valid_avg))
+
+
+
 def get_percentage_of_o_atoms(percent_atom_al,
                               percent_atom_ga,
                               percent_atom_in):