Skip to content

Commit

Permalink
Polynomial fitting for total number of atoms 10
Browse files Browse the repository at this point in the history
  • Loading branch information
diwadd committed Jan 13, 2018
1 parent e30f233 commit d39e543
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 13 deletions.
Empty file modified general_structure_features.py
100644 → 100755
Empty file.
20 changes: 13 additions & 7 deletions geometry_xyz.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,13 @@ def ewald_matrix_features(data,
logger.info("total_energy_matrix trace: " + str(ewald_sum_data[i][6]))

ewald_sum_data = np.hstack((ids, ewald_sum_data))

if noa != -1:
ewald_sum_real_energy_matrix = np.hstack((ids, ewald_sum_real_energy_matrix))
ewald_sum_reciprocal_energy_matrix = np.hstack((ids, ewald_sum_reciprocal_energy_matrix))
ewald_sum_total_energy_matrix = np.hstack((ids, ewald_sum_total_energy_matrix))
ewald_sum_point_energy_matrix = np.hstack((ids, ewald_sum_point_energy_matrix))

np.savetxt(file_name_type + "_ewald_sum_data.csv", ewald_sum_data, delimiter=",")
np.save(file_name_type + "_ewald_sum_data.npy", ewald_sum_data)

Expand Down Expand Up @@ -1133,12 +1140,11 @@ def ewald_matrix_features(data,
assert np.array_equal(train_total_number_of_atoms, test_total_number_of_atoms), assert_error_text


# scan_through_geometry_files_and_extrac_features(train_data, data_type="train", file_name_type="train_")
# ewald_matrix_features(train_data, -1, data_type="train", file_name_type="train_")
#
# scan_through_geometry_files_and_extrac_features(test_data, data_type="test", file_name_type="test_")
# ewald_matrix_features(test_data, -1, data_type="test", file_name_type="test_")
#scan_through_geometry_files_and_extrac_features(train_data, data_type="train", file_name_type="train_")
ewald_matrix_features(train_data, -1, data_type="train", file_name_type="train_")

scan_through_geometry_files_and_extrac_features(test_data, data_type="test", file_name_type="test_")
ewald_matrix_features(test_data, -1, data_type="test", file_name_type="test_")

for i in range(len(train_total_number_of_atoms)):

Expand All @@ -1153,7 +1159,7 @@ def ewald_matrix_features(data,
logger.info("number of atoms {0}; data.shape: {1}".format(noa, conditioned_data.shape))

# hist_data(data[:, -1], text=str(noa))
file_name_type = "train_" + str(noa) + "_"
file_name_type = "train_" + str(noa)
local_data_type = "train"
scan_through_geometry_files_and_extrac_features(conditioned_data,
data_type=local_data_type,
Expand All @@ -1172,7 +1178,7 @@ def ewald_matrix_features(data,
conditioned_data = test_data[ condition ]
logger.info("number of atoms {0}; data.shape: {1}".format(noa, conditioned_data.shape))

file_name_type = "test_" + str(noa) + "_"
file_name_type = "test_" + str(noa)
local_data_type = "test"
scan_through_geometry_files_and_extrac_features(conditioned_data,
data_type=data_type,
Expand Down
4 changes: 3 additions & 1 deletion global_flags_constanst.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
LABELS["bandgap_energy_ev"] = 13


LOGGING_LEVEL = logging.INFO
ID = 0
NUMBER_OF_TOTAL_ATOMS = 2


LOGGING_LEVEL = logging.INFO
SPACE_GROUP_PROPERTIES = {12: 4,
33: 4,
167: 12,
Expand Down
Empty file modified graph_preformance.py
100644 → 100755
Empty file.
Empty file modified main.py
100644 → 100755
Empty file.
13 changes: 11 additions & 2 deletions models.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,17 @@ def evaluate(self, x, y_true):
y_pred = self.predict(x)
y_true = y_true.reshape((-1, 1))

logger.info("Five example predictions:")
for i in range(5):
logger.info("Example predictions:")

if n == 1:
# number of example to print
noetp = 1
elif n > 5:
noetp = 5
else:
noetp = 0

for i in range(noetp):
logger.info("y_pred: {0}; y_true: {1}".format(y_pred[i], y_true[i]))

rmsle = sf.root_mean_squared_logarithmic_error(y_true, y_pred)
Expand Down
Empty file modified non_geometry_features.py
100644 → 100755
Empty file.
4 changes: 2 additions & 2 deletions plotting_features.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#plt.scatter(features[:, 5], data[:, -1])
#plt.hist2d(data[:, -1], features[:, 1], bins=60)
index = 9
target = -2
index = 7
target = -1
bg_index = 14

plt.scatter(custom_data[custom_data[:, bg_index] == 10, index], custom_data[custom_data[:, bg_index] == 10, target], label="10")
Expand Down
Empty file modified structure_visualization.sage
100644 → 100755
Empty file.
Empty file modified support_classes.py
100644 → 100755
Empty file.
71 changes: 70 additions & 1 deletion support_functions.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,20 @@ def cross_validate(x,
model_class,
model_parameters=None,
fraction=0.1):
"""
Perform normal corss validation.
A fraction of the total data is used as
the test set. If, e.g., fraction=0.1 ten
cross validation rounds will be performed.
:param x:
:param y:
:param model_class:
:param model_parameters:
:param fraction:
:return:
"""


logger.debug("Cross validating data.")

Expand Down Expand Up @@ -150,14 +164,15 @@ def cross_validate(x,
model_parameters["validation_data"] = (valid_data, valid_targets)
model = model_class(**model_parameters)


_, train_m = train_targets.shape
if train_m == 1:
model.fit(train_data, train_targets.ravel())
else:
model.fit(train_data, train_targets)

custom_data = np.hstack((valid_data, valid_targets))
condition = custom_data[:, gfc.LABELS["number_of_total_atoms"] - 1] == 80
condition = custom_data[:, gfc.LABELS["number_of_total_atoms"] - 1] == 10
custom_data = custom_data[condition]
custom_valid_data = custom_data[:, 0:-1]
custom_targets_data = custom_data[:, -1].reshape(-1, 1)
Expand All @@ -179,8 +194,62 @@ def cross_validate(x,

logger.info("train_avg: {0}, valid_avg: {1}".format(train_avg, valid_avg))

# This printout is used by graph_performace.py to grab the
# results of grap_performance.py. Print is simpler that logging.
print(str(train_avg) + "x" + str(valid_avg), end="")


def one_left_cross_validation(x,
y,
model_class,
model_parameters=None,
fraction=0.1):

logger.info("One left cross validation...")
n, m = x.shape

train_avg = 0.0
valid_avg = 0.0
for i in range(n):

train_data = np.delete(x, [i], axis=0)
train_targets = np.delete(y, [i], axis=0)

logger.info("train_data.shape: {0}".format(train_data.shape))
logger.info("train_targets.shape: {0}".format(train_targets.shape))

# valid_x is a single example so its shape
# should be (1, n_features)
valid_x = x[i, :].reshape(1, -1)
valid_y = y[i, :].reshape(-1, 1)

logger.info("test_x.shape: {0}".format(valid_x.shape))
logger.info("test_y.shape: {0}".format(valid_y.shape))

model_parameters["validation_data"] = (valid_x, valid_y)
model = model_class(**model_parameters)

_, train_m = train_targets.shape
if train_m == 1:
model.fit(train_data, train_targets.ravel())
else:
model.fit(train_data, train_targets)

rmsle_train = model.evaluate(train_data, train_targets)
rmsle_valid = model.evaluate(valid_x, valid_y)

logger.info("i: {0}, rmsle_train: {1:.9f}, rmsle_valid: {2:.9f}".format(i, rmsle_train, rmsle_valid))

train_avg = train_avg + rmsle_train
valid_avg = valid_avg + rmsle_valid

train_avg = train_avg/n
valid_avg = valid_avg/n

logger.info("train_avg: {0}, valid_avg: {1}".format(train_avg, valid_avg))



def get_percentage_of_o_atoms(percent_atom_al,
percent_atom_ga,
percent_atom_in):
Expand Down

0 comments on commit d39e543

Please sign in to comment.