From 93bf64250306937b1fece9c339ee115d492b9428 Mon Sep 17 00:00:00 2001 From: Runhai Ouyang Date: Sat, 24 Aug 2024 12:20:40 +0800 Subject: [PATCH] Add files via upload --- utilities/Ionic_Radii | 1212 ++++++++++++++++++++++++++++ utilities/Readme | 40 + utilities/SISSO_predict.f90 | 312 +++++++ utilities/SISSO_predict_para | 4 + utilities/SVC.py | 108 +++ utilities/VarSelect_SISSO.py | 545 +++++++++++++ utilities/af2traindat.f90 | 131 +++ utilities/atom_features | 103 +++ utilities/k-fold-cv.f90 | 142 ++++ utilities/leave-percent-out-cv.f90 | 134 +++ utilities/samplelist | 26 + 11 files changed, 2757 insertions(+) create mode 100644 utilities/Ionic_Radii create mode 100644 utilities/Readme create mode 100644 utilities/SISSO_predict.f90 create mode 100644 utilities/SISSO_predict_para create mode 100644 utilities/SVC.py create mode 100644 utilities/VarSelect_SISSO.py create mode 100644 utilities/af2traindat.f90 create mode 100644 utilities/atom_features create mode 100644 utilities/k-fold-cv.f90 create mode 100644 utilities/leave-percent-out-cv.f90 create mode 100644 utilities/samplelist diff --git a/utilities/Ionic_Radii b/utilities/Ionic_Radii new file mode 100644 index 0000000..5110400 --- /dev/null +++ b/utilities/Ionic_Radii @@ -0,0 +1,1212 @@ +ions r_eff Sources +Li1+_06 0.76 Shannon +Li1+_04 0.59 Shannon +Li1+_08 0.92 Shannon +Be2+_03 0.16 Shannon +Be2+_04 0.27 Shannon +Be2+_06 0.45 Shannon +B_3+_06 0.27 Shannon +B_3+_03 0.01 Shannon +B_3+_04 0.11 Shannon +C_4+_04 0.15 Shannon +C_4+_06 0.16 Shannon +N_3-_04 1.46 Shannon +N_3+_06 0.16 Shannon +N_5+_06 0.13 Shannon +O_2-_06 1.40 Shannon +O_2-_02 1.35 Shannon +O_2-_03 1.36 Shannon +O_2-_04 1.38 Shannon +O_2-_08 1.42 Shannon +F_1-_06 1.33 Shannon +F_1-_02 1.29 Shannon +F_1-_03 1.30 Shannon +F_1-_04 1.31 Shannon +F_7+_06 0.08 Shannon +Na1+_06 1.02 Shannon +Na1+_04 0.99 Shannon +Na1+_05 1.00 Shannon +Na1+_07 1.12 Shannon +Na1+_08 1.18 Shannon +Na1+_09 1.24 Shannon +Na1+_12 1.39 Shannon +Mg2+_06 0.72 Shannon +Mg2+_04 0.57 Shannon +Mg2+_05 0.66 Shannon +Mg2+_08 0.89 Shannon +Al3+_06 0.54 Shannon +Al3+_04 0.39 Shannon +Al3+_05 0.48 Shannon +Si4+_06 0.40 Shannon +Si4+_04 0.26 Shannon +P_3+_06 0.44 Shannon +P_3+_04 0.17 Shannon +P_3+_05 0.29 Shannon +P_5+_06 0.38 Shannon +S_2-_06 1.84 Shannon +S_4+_06 0.37 Shannon +S_6+_06 0.29 Shannon +S_6+_04 0.12 Shannon +Cl1-_06 1.81 Shannon +Cl5+_03 0.12 Shannon +Cl7+_04 0.08 Shannon +Cl7+_06 0.27 Shannon +K_1+_06 1.38 Shannon +K_1+_04 1.37 Shannon +K_1+_07 1.46 Shannon +K_1+_08 1.51 Shannon +K_1+_09 1.55 Shannon +K_1+_10 1.59 Shannon +K_1+_12 1.64 Shannon +Ca2+_06 1.00 Shannon +Ca2+_07 1.06 Shannon +Ca2+_08 1.12 Shannon +Ca2+_09 1.18 Shannon +Ca2+_10 1.23 Shannon +Ca2+_12 1.34 Shannon +Sc3+_06 0.75 Shannon +Sc3+_08 0.87 Shannon +Ti2+_06 0.86 Shannon +Ti3+_06 0.67 Shannon +Ti4+_06 0.61 Shannon +Ti4+_04 0.42 Shannon +Ti4+_05 0.51 Shannon +Ti4+_08 0.74 Shannon +V_2+_06 0.79 Shannon +V_3+_06 0.64 Shannon +V_4+_06 0.58 Shannon +V_4+_05 0.53 Shannon +V_4+_08 0.72 Shannon +V_5+_04 0.36 Shannon +V_5+_05 0.46 Shannon +V_5+_06 0.54 Shannon +Cr2+_06 0.80 Shannon +Cr3+_06 0.62 Shannon +Cr4+_06 0.55 Shannon +Cr4+_04 0.41 Shannon +Cr5+_06 0.49 Shannon +Cr5+_04 0.35 Shannon +Cr5+_08 0.57 Shannon +Cr6+_04 0.26 Shannon +Cr6+_06 0.44 Shannon +Mn2+_06 0.83 Shannon +Mn2+_04 0.66 Shannon +Mn2+_05 0.75 Shannon +Mn2+_07 0.90 Shannon +Mn2+_08 0.96 Shannon +Mn3+_05 0.72 Shannon +Mn3+_06 0.65 Shannon +Mn4+_04 0.39 Shannon +Mn4+_06 0.53 Shannon +Mn5+_04 0.33 Shannon +Mn6+_04 0.26 Shannon +Mn7+_04 0.25 Shannon +Mn7+_06 0.46 Shannon +Fe2+_06 0.78 Shannon +Fe2+_04 0.63 Shannon +Fe2+_08 0.92 Shannon +Fe3+_06 0.65 Shannon +Fe3+_04 0.49 Shannon +Fe3+_05 0.58 Shannon +Fe3+_08 0.78 Shannon +Fe4+_06 0.59 Shannon +Fe6+_04 0.25 Shannon +Co2+_06 0.75 Shannon +Co2+_04 0.58 Shannon +Co2+_05 0.67 Shannon +Co2+_08 0.90 Shannon +Co3+_06 0.61 Shannon +Co4+_06 0.53 Shannon +Co4+_04 0.40 Shannon +Ni2+_04 0.55 Shannon +Ni2+_05 0.63 Shannon +Ni2+_06 0.69 Shannon +Ni3+_06 0.60 Shannon +Ni4+_06 0.48 Shannon +Cu1+_02 0.46 Shannon +Cu1+_04 0.60 Shannon +Cu1+_06 0.77 Shannon +Cu2+_04 0.57 Shannon +Cu2+_05 0.65 Shannon +Cu2+_06 0.73 Shannon +Cu3+_06 0.54 Shannon +Zn2+_06 0.74 Shannon +Zn2+_04 0.60 Shannon +Zn2+_05 0.68 Shannon +Zn2+_08 0.90 Shannon +Ga3+_04 0.47 Shannon +Ga3+_05 0.55 Shannon +Ga3+_06 0.62 Shannon +Ge2+_06 0.73 Shannon +Ge4+_06 0.53 Shannon +Ge4+_04 0.39 Shannon +As3+_06 0.58 Shannon +As5+_06 0.46 Shannon +As5+_04 0.34 Shannon +Se2-_06 1.98 Shannon +Se4+_06 0.50 Shannon +Se6+_06 0.42 Shannon +Se6+_04 0.28 Shannon +Br1-_06 1.96 Shannon +Br3+_04 0.59 Shannon +Br5+_03 0.31 Shannon +Br7+_04 0.25 Shannon +Br7+_06 0.39 Shannon +Rb1+_06 1.52 Shannon +Rb1+_07 1.56 Shannon +Rb1+_08 1.61 Shannon +Rb1+_09 1.63 Shannon +Rb1+_10 1.66 Shannon +Rb1+_11 1.69 Shannon +Rb1+_14 1.83 Shannon +Rb1+_12 1.72 Shannon +Sr2+_06 1.18 Shannon +Sr2+_07 1.21 Shannon +Sr2+_08 1.26 Shannon +Sr2+_09 1.31 Shannon +Sr2+_10 1.36 Shannon +Sr2+_12 1.44 Shannon +Y_3+_06 0.90 Shannon +Y_3+_07 0.96 Shannon +Y_3+_08 1.02 Shannon +Y_3+_09 1.08 Shannon +Zr4+_06 0.72 Shannon +Zr4+_04 0.59 Shannon +Zr4+_05 0.66 Shannon +Zr4+_07 0.78 Shannon +Zr4+_08 0.84 Shannon +Zr4+_09 0.89 Shannon +Nb3+_06 0.72 Shannon +Nb4+_06 0.68 Shannon +Nb4+_08 0.79 Shannon +Nb5+_06 0.64 Shannon +Nb5+_04 0.48 Shannon +Nb5+_07 0.69 Shannon +Nb5+_08 0.74 Shannon +Mo3+_06 0.69 Shannon +Mo4+_06 0.65 Shannon +Mo5+_06 0.61 Shannon +Mo5+_04 0.46 Shannon +Mo6+_06 0.59 Shannon +Mo6+_04 0.41 Shannon +Mo6+_05 0.50 Shannon +Mo6+_07 0.73 Shannon +Tc4+_06 0.65 Shannon +Tc5+_06 0.60 Shannon +Tc7+_06 0.56 Shannon +Tc7+_04 0.37 Shannon +Ru3+_06 0.68 Shannon +Ru4+_06 0.62 Shannon +Ru5+_06 0.57 Shannon +Ru7+_04 0.38 Shannon +Ru8+_04 0.36 Shannon +Rh3+_06 0.67 Shannon +Rh4+_06 0.60 Shannon +Rh5+_06 0.55 Shannon +Pd1+_02 0.59 Shannon +Pd2+_06 0.86 Shannon +Pd3+_06 0.76 Shannon +Pd4+_06 0.62 Shannon +Ag1+_06 1.15 Shannon +Ag1+_02 0.67 Shannon +Ag1+_05 1.09 Shannon +Ag1+_07 1.22 Shannon +Ag1+_08 1.28 Shannon +Ag2+_06 0.94 Shannon +Ag3+_06 0.75 Shannon +Cd2+_06 0.95 Shannon +Cd2+_04 0.78 Shannon +Cd2+_05 0.87 Shannon +Cd2+_07 1.03 Shannon +Cd2+_08 1.10 Shannon +Cd2+_12 1.31 Shannon +In3+_06 0.80 Shannon +In3+_04 0.62 Shannon +In3+_08 0.92 Shannon +Sn4+_06 0.69 Shannon +Sn4+_04 0.55 Shannon +Sn4+_05 0.62 Shannon +Sn4+_07 0.75 Shannon +Sn4+_08 0.81 Shannon +Sb3+_06 0.76 Shannon +Sb3+_05 0.80 Shannon +Sb5+_06 0.60 Shannon +Te2-_06 2.21 Shannon +Te4+_06 0.97 Shannon +Te4+_03 0.52 Shannon +Te4+_04 0.66 Shannon +Te6+_06 0.56 Shannon +Te6+_04 0.43 Shannon +I_1-_06 2.20 Shannon +I_5+_03 0.44 Shannon +I_5+_06 0.95 Shannon +I_7+_06 0.53 Shannon +I_7+_04 0.42 Shannon +Cs1+_06 1.67 Shannon +Cs1+_08 1.74 Shannon +Cs1+_09 1.78 Shannon +Cs1+_10 1.81 Shannon +Cs1+_11 1.85 Shannon +Cs1+_12 1.88 Shannon +Ba2+_06 1.35 Shannon +Ba2+_07 1.38 Shannon +Ba2+_08 1.42 Shannon +Ba2+_09 1.47 Shannon +Ba2+_10 1.52 Shannon +Ba2+_11 1.57 Shannon +Ba2+_12 1.61 Shannon +La3+_06 1.03 Shannon +La3+_07 1.10 Shannon +La3+_08 1.16 Shannon +La3+_09 1.22 Shannon +La3+_10 1.27 Shannon +La3+_12 1.36 Shannon +Ce3+_06 1.01 Shannon +Ce3+_07 1.07 Shannon +Ce3+_08 1.14 Shannon +Ce3+_09 1.20 Shannon +Ce3+_10 1.25 Shannon +Ce3+_12 1.34 Shannon +Ce4+_06 0.87 Shannon +Ce4+_08 0.97 Shannon +Ce4+_10 1.07 Shannon +Ce4+_12 1.14 Shannon +Pr3+_06 0.99 Shannon +Pr3+_08 1.13 Shannon +Pr3+_09 1.18 Shannon +Pr4+_06 0.85 Shannon +Pr4+_08 0.96 Shannon +Nd2+_08 1.29 Shannon +Nd2+_09 1.35 Shannon +Nd3+_06 0.98 Shannon +Nd3+_08 1.11 Shannon +Nd3+_09 1.16 Shannon +Nd3+_12 1.27 Shannon +Pm3+_06 0.97 Shannon +Pm3+_08 1.09 Shannon +Pm3+_09 1.14 Shannon +Sm2+_07 1.22 Shannon +Sm2+_08 1.27 Shannon +Sm2+_09 1.32 Shannon +Sm3+_06 0.96 Shannon +Sm3+_07 1.02 Shannon +Sm3+_08 1.08 Shannon +Sm3+_09 1.13 Shannon +Sm3+_12 1.24 Shannon +Eu2+_06 1.17 Shannon +Eu2+_07 1.20 Shannon +Eu2+_08 1.25 Shannon +Eu2+_09 1.30 Shannon +Eu2+_10 1.35 Shannon +Eu3+_06 0.95 Shannon +Eu3+_07 1.01 Shannon +Eu3+_08 1.07 Shannon +Eu3+_09 1.12 Shannon +Gd3+_06 0.94 Shannon +Tb3+_06 0.92 Shannon +Tb3+_07 0.98 Shannon +Tb3+_08 1.04 Shannon +Tb3+_09 1.10 Shannon +Tb4+_06 0.76 Shannon +Tb4+_08 0.88 Shannon +Dy2+_06 1.07 Shannon +Dy2+_07 1.13 Shannon +Dy2+_08 1.19 Shannon +Dy3+_06 0.91 Shannon +Dy3+_07 0.97 Shannon +Dy3+_08 1.03 Shannon +Dy3+_09 1.08 Shannon +Ho3+_06 0.90 Shannon +Ho3+_08 1.02 Shannon +Ho3+_09 1.07 Shannon +Ho3+_10 1.12 Shannon +Er3+_06 0.89 Shannon +Er3+_07 0.95 Shannon +Er3+_08 1.00 Shannon +Er3+_09 1.06 Shannon +Tm2+_06 1.03 Shannon +Tm2+_07 1.09 Shannon +Tm3+_06 0.88 Shannon +Tm3+_08 0.99 Shannon +Tm3+_09 1.05 Shannon +Yb2+_06 1.02 Shannon +Yb2+_07 1.08 Shannon +Yb2+_08 1.14 Shannon +Yb3+_06 0.87 Shannon +Yb3+_07 0.93 Shannon +Yb3+_08 0.99 Shannon +Yb3+_09 1.04 Shannon +Lu3+_06 0.86 Shannon +Lu3+_08 0.98 Shannon +Lu3+_09 1.03 Shannon +Hf4+_06 0.71 Shannon +Hf4+_04 0.58 Shannon +Hf4+_07 0.76 Shannon +Hf4+_08 0.83 Shannon +Ta3+_06 0.72 Shannon +Ta4+_06 0.68 Shannon +Ta5+_06 0.64 Shannon +Ta5+_07 0.69 Shannon +Ta5+_08 0.74 Shannon +W_4+_06 0.66 Shannon +W_5+_06 0.62 Shannon +W_6+_04 0.42 Shannon +W_6+_05 0.51 Shannon +W_6+_06 0.60 Shannon +Re4+_06 0.63 Shannon +Re5+_06 0.58 Shannon +Re6+_06 0.55 Shannon +Re7+_06 0.53 Shannon +Re7+_04 0.38 Shannon +Os4+_06 0.63 Shannon +Os5+_06 0.58 Shannon +Os6+_06 0.55 Shannon +Os6+_05 0.49 Shannon +Os7+_06 0.53 Shannon +Os8+_04 0.39 Shannon +Ir3+_06 0.68 Shannon +Ir4+_06 0.63 Shannon +Ir5+_06 0.57 Shannon +Pt2+_06 0.80 Shannon +Pt4+_06 0.63 Shannon +Pt5+_06 0.57 Shannon +Au1+_06 1.37 Shannon +Au3+_06 0.85 Shannon +Au5+_06 0.57 Shannon +Hg1+_03 0.97 Shannon +Hg1+_06 1.19 Shannon +Hg2+_06 1.02 Shannon +Hg2+_02 0.69 Shannon +Hg2+_04 0.96 Shannon +Hg2+_08 1.14 Shannon +Tl1+_06 1.50 Shannon +Tl1+_08 1.59 Shannon +Tl1+_12 1.70 Shannon +Tl3+_06 0.89 Shannon +Tl3+_04 0.75 Shannon +Tl3+_08 0.98 Shannon +Pb2+_06 1.19 Shannon +Pb2+_07 1.23 Shannon +Pb2+_08 1.29 Shannon +Pb2+_09 1.35 Shannon +Pb2+_10 1.40 Shannon +Pb2+_11 1.45 Shannon +Pb2+_12 1.49 Shannon +Pb4+_06 0.78 Shannon +Pb4+_04 0.65 Shannon +Pb4+_05 0.73 Shannon +Pb4+_08 0.94 Shannon +Bi3+_06 1.03 Shannon +Bi3+_05 0.96 Shannon +Bi3+_08 1.17 Shannon +Bi5+_06 0.76 Shannon +Po4+_06 0.94 Shannon +Po4+_08 1.08 Shannon +Po6+_06 0.67 Shannon +At7+_06 0.62 Shannon +Fr1+_06 1.80 Shannon +Ra2+_08 1.48 Shannon +Ra2+_12 1.70 Shannon +Ac3+_06 1.12 Shannon +Th4+_06 0.94 Shannon +Th4+_08 1.05 Shannon +Th4+_09 1.09 Shannon +Th4+_10 1.13 Shannon +Th4+_11 1.18 Shannon +Th4+_12 1.21 Shannon +Pa3+_06 1.04 Shannon +Pa4+_06 0.90 Shannon +Pa4+_08 1.01 Shannon +Pa5+_06 0.78 Shannon +Pa5+_08 0.91 Shannon +Pa5+_09 0.95 Shannon +U_3+_06 1.03 Shannon +U_4+_06 0.89 Shannon +U_4+_07 0.95 Shannon +U_4+_08 1.00 Shannon +U_4+_09 1.05 Shannon +U_4+_12 1.17 Shannon +U_5+_06 0.76 Shannon +U_5+_07 0.84 Shannon +U_6+_06 0.73 Shannon +U_6+_02 0.45 Shannon +U_6+_04 0.52 Shannon +U_6+_07 0.81 Shannon +U_6+_08 0.86 Shannon +Np2+_06 1.10 Shannon +Np3+_06 1.01 Shannon +Np4+_06 0.87 Shannon +Np4+_08 0.98 Shannon +Np5+_06 0.75 Shannon +Np6+_06 0.72 Shannon +Np7+_06 0.71 Shannon +Pu3+_06 1.00 Shannon +Pu4+_06 0.86 Shannon +Pu4+_08 0.96 Shannon +Pu5+_06 0.74 Shannon +Pu6+_06 0.71 Shannon +Am2+_08 1.26 Shannon +Am2+_07 1.21 Shannon +Am2+_09 1.31 Shannon +Am3+_08 1.09 Shannon +Am3+_06 0.98 Shannon +Am4+_08 0.95 Shannon +Am4+_06 0.85 Shannon +Cm3+_06 0.97 Shannon +Cm4+_06 0.85 Shannon +Cm4+_08 0.95 Shannon +Bk3+_06 0.96 Shannon +Bk4+_06 0.83 Shannon +Bk4+_08 0.93 Shannon +Cf3+_06 0.95 Shannon +Cf4+_06 0.82 Shannon +Cf4+_08 0.92 Shannon +Li1+_12 1.1855 Prediction +Be2+_12 0.9352 Prediction +B_3+_12 0.6854 Prediction +C_4+_12 0.4245 Prediction +N_3+_12 0.1916 Prediction +N_5+_12 0.2237 Prediction +Mg2+_12 1.1495 Prediction +Al3+_12 0.9762 Prediction +Si4+_12 0.7973 Prediction +P_3+_12 0.7985 Prediction +P_5+_12 0.7580 Prediction +S_4+_12 0.6448 Prediction +S_6+_12 0.5450 Prediction +Cl7+_12 0.4257 Prediction +Sc3+_12 1.1544 Prediction +Ti2+_12 1.2685 Prediction +Ti3+_12 1.0712 Prediction +Ti4+_12 1.0193 Prediction +V_2+_12 1.1952 Prediction +V_3+_12 1.0375 Prediction +V_4+_12 0.9844 Prediction +V_5+_12 0.9503 Prediction +Cr2+_12 1.2182 Prediction +Cr3+_12 1.0092 Prediction +Cr4+_12 0.9411 Prediction +Cr5+_12 0.8793 Prediction +Cr6+_12 0.8269 Prediction +Mn2+_12 1.2616 Prediction +Mn3+_12 1.0468 Prediction +Mn4+_12 0.9067 Prediction +Mn7+_12 0.8611 Prediction +Fe2+_12 1.1847 Prediction +Fe3+_12 1.0418 Prediction +Fe4+_12 0.9867 Prediction +Co2+_12 1.1365 Prediction +Co3+_12 0.9755 Prediction +Co4+_12 0.8904 Prediction +Ni2+_12 1.0340 Prediction +Ni3+_12 0.9517 Prediction +Ni4+_12 0.8038 Prediction +Cu1+_12 1.0264 Prediction +Cu2+_12 1.0908 Prediction +Cu3+_12 0.8496 Prediction +Zn2+_12 1.0938 Prediction +Ga3+_12 0.9593 Prediction +Ge2+_12 1.0780 Prediction +Ge4+_12 0.8291 Prediction +As3+_12 0.8414 Prediction +As5+_12 0.7030 Prediction +Se4+_12 0.6792 Prediction +Se6+_12 0.5920 Prediction +Br7+_12 0.4829 Prediction +Y_3+_12 1.2464 Prediction +Zr2+_06 0.9762 Prediction +Zr2+_12 1.3083 Prediction +Zr4+_12 1.0773 Prediction +Nb3+_12 1.0514 Prediction +Nb4+_12 1.0296 Prediction +Nb5+_12 1.0010 Prediction +Mo3+_12 1.0126 Prediction +Mo4+_12 0.9919 Prediction +Mo5+_12 0.9637 Prediction +Mo6+_12 0.9546 Prediction +Tc4+_12 0.9876 Prediction +Tc5+_12 0.9474 Prediction +Tc7+_12 0.9274 Prediction +Ru3+_12 0.9883 Prediction +Ru4+_12 0.9444 Prediction +Ru5+_12 0.9041 Prediction +Rh3+_12 0.9696 Prediction +Rh4+_12 0.9130 Prediction +Rh5+_12 0.8729 Prediction +Pd2+_12 1.1580 Prediction +Pd3+_12 1.0824 Prediction +Pd4+_12 0.9414 Prediction +Ag1+_12 1.4287 Prediction +Ag2+_12 1.2598 Prediction +Ag3+_12 1.0662 Prediction +In3+_12 1.1253 Prediction +Sn4+_12 1.0047 Prediction +Sb3+_12 1.0615 Prediction +Sb5+_12 0.8856 Prediction +Te4+_12 1.5954 Prediction +Te6+_12 0.8236 Prediction +I_5+_12 1.7278 Prediction +I_7+_12 0.7646 Prediction +Pr3+_12 1.2910 Prediction +Pr4+_12 1.1672 Prediction +Pm3+_12 1.2664 Prediction +Eu2+_12 1.4285 Prediction +Eu3+_12 1.2385 Prediction +Gd3+_12 1.2313 Prediction +Tb3+_12 1.2078 Prediction +Tb4+_12 1.0628 Prediction +Dy2+_12 1.3262 Prediction +Dy3+_12 1.1952 Prediction +Ho3+_12 1.1827 Prediction +Er3+_12 1.1702 Prediction +Tm2+_12 1.2774 Prediction +Tm3+_12 1.1576 Prediction +Yb2+_12 1.2644 Prediction +Yb3+_12 1.1458 Prediction +Lu3+_12 1.1317 Prediction +Hf4+_12 0.9874 Prediction +Ta3+_12 0.9509 Prediction +Ta4+_12 0.9394 Prediction +Ta5+_12 0.9174 Prediction +W_4+_12 0.9024 Prediction +W_5+_12 0.8813 Prediction +W_6+_12 0.8785 Prediction +Re4+_12 0.8507 Prediction +Re5+_12 0.8176 Prediction +Re6+_12 0.8031 Prediction +Re7+_12 0.7967 Prediction +Os4+_12 0.8358 Prediction +Os5+_12 0.8029 Prediction +Os6+_12 0.7891 Prediction +Os7+_12 0.7835 Prediction +Ir3+_12 0.8446 Prediction +Ir4+_12 0.8262 Prediction +Ir5+_12 0.7799 Prediction +Pt2+_12 0.9520 Prediction +Pt4+_12 0.8459 Prediction +Pt5+_12 0.8005 Prediction +Au1+_12 1.5697 Prediction +Au3+_12 1.0865 Prediction +Au5+_12 0.8020 Prediction +Hg1+_12 1.3209 Prediction +Hg2+_12 1.2370 Prediction +Tl3+_12 1.1319 Prediction +Pb4+_12 1.0207 Prediction +Bi3+_12 1.3493 Prediction +Bi5+_12 1.0204 Prediction +Po4+_12 1.2735 Prediction +Po6+_12 0.9011 Prediction +At7+_12 0.8245 Prediction +Fr1+_12 1.9233 Prediction +Ac3+_12 1.3682 Prediction +Pa3+_12 1.2876 Prediction +Pa4+_12 1.1665 Prediction +Pa5+_12 1.0586 Prediction +U_3+_12 1.2763 Prediction +U_5+_12 1.0297 Prediction +U_6+_12 1.0147 Prediction +Np2+_12 1.2987 Prediction +Np3+_12 1.2515 Prediction +Np4+_12 1.1275 Prediction +Np5+_12 1.0166 Prediction +Np6+_12 1.0018 Prediction +Np7+_12 1.0057 Prediction +Pu3+_12 1.2381 Prediction +Pu4+_12 1.1143 Prediction +Pu5+_12 1.0036 Prediction +Pu6+_12 0.9888 Prediction +Am3+_12 1.2138 Prediction +Am4+_12 1.1024 Prediction +Cm3+_12 1.2007 Prediction +Cm4+_12 1.1006 Prediction +Bk3+_12 1.1875 Prediction +Bk4+_12 1.0765 Prediction +Cf3+_12 1.1743 Prediction +Cf4+_12 1.0635 Prediction +Am2+_12 1.4104 Prediction +Cm2+_12 1.3918 Prediction +Pa2+_12 1.4277 Prediction +Pu2+_12 1.4397 Prediction +Ta2+_12 1.2124 Prediction +U_2+_12 1.5088 Prediction +Sm2+_12 1.4215 Prediction +Gd2+_12 1.3975 Prediction +Ce2+_12 1.5073 Prediction +Er2+_12 1.3140 Prediction +Ho2+_12 1.3292 Prediction +Lu2+_12 1.2672 Prediction +Nd2+_12 1.4552 Prediction +Pr2+_12 1.4721 Prediction +Sc2+_12 1.2750 Prediction +Tb2+_12 1.3631 Prediction +Y_2+_12 1.4038 Prediction +Th2+_12 1.5626 Prediction +La2+_12 1.5397 Prediction +Th3+_12 1.3297 Prediction +Hf3+_12 1.0559 Prediction +Tc3+_12 1.0594 Prediction +Sn2+_12 1.4573 Prediction +Am2+_06 1.2029 Prediction +Cm2+_06 1.1884 Prediction +Pa2+_06 1.2137 Prediction +Pu2+_06 1.2256 Prediction +Ta2+_06 0.9824 Prediction +U_2+_06 1.2818 Prediction +Sm2+_06 1.1548 Prediction +Gd2+_06 1.1323 Prediction +Ce2+_06 1.2272 Prediction +Er2+_06 1.0624 Prediction +Ho2+_06 1.0750 Prediction +Lu2+_06 1.0245 Prediction +Nd2+_06 1.1830 Prediction +Pr2+_06 1.1971 Prediction +Sc2+_06 0.8806 Prediction +Tb2+_06 1.1028 Prediction +Y_2+_06 1.0762 Prediction +Th2+_06 1.3401 Prediction +La2+_06 1.2559 Prediction +Th3+_06 1.0786 Prediction +Hf3+_06 0.7977 Prediction +Tc3+_06 0.7323 Prediction +Sn2+_06 1.0394 Prediction +Cl5+_06 0.4280 Prediction +Mn5+_06 0.4860 Prediction +Mn6+_06 0.4100 Prediction +Fe6+_06 0.3980 Prediction +Br3+_06 0.7120 Prediction +Br5+_06 0.4790 Prediction +Ru7+_06 0.5250 Prediction +Ru8+_06 0.5070 Prediction +Pd1+_06 0.8670 Prediction +Os8+_06 0.5190 Prediction +Ra2+_06 1.3990 Prediction +Cl5+_12 0.7794 Prediction +Mn5+_12 0.8670 Prediction +Mn6+_12 0.7781 Prediction +Fe6+_12 0.7524 Prediction +Br3+_12 1.2533 Prediction +Br5+_12 0.6309 Prediction +Ru7+_12 0.8791 Prediction +Ru8+_12 0.8681 Prediction +Pd1+_12 1.0688 Prediction +Os8+_12 0.7864 Prediction +N_3+_04 0.0816 Prediction +N_5+_04 0.0416 Prediction +P_5+_04 0.2163 Prediction +S_4+_04 0.2196 Prediction +Cl5+_04 0.2358 Prediction +Ca2+_04 0.8608 Prediction +Sc3+_04 0.5972 Prediction +Ti2+_04 0.6984 Prediction +Ti3+_04 0.5141 Prediction +V_2+_04 0.6235 Prediction +V_3+_04 0.4801 Prediction +V_4+_04 0.4203 Prediction +Cr2+_04 0.6235 Prediction +Cr3+_04 0.4578 Prediction +Mn3+_04 0.4829 Prediction +Fe4+_04 0.4239 Prediction +Co3+_04 0.4473 Prediction +Ni3+_04 0.4387 Prediction +Ni4+_04 0.3320 Prediction +Cu3+_04 0.3919 Prediction +Ge2+_04 0.5421 Prediction +As3+_04 0.4172 Prediction +Se4+_04 0.3516 Prediction +Br5+_04 0.3268 Prediction +Rb1+_04 1.4219 Prediction +Sr2+_04 1.0581 Prediction +Y_3+_04 0.7636 Prediction +Nb3+_04 0.5832 Prediction +Nb4+_04 0.5395 Prediction +Mo3+_04 0.5527 Prediction +Mo4+_04 0.5090 Prediction +Tc4+_04 0.5075 Prediction +Tc5+_04 0.4564 Prediction +Ru3+_04 0.5408 Prediction +Ru4+_04 0.4788 Prediction +Ru5+_04 0.4280 Prediction +Rh3+_04 0.5318 Prediction +Rh4+_04 0.4608 Prediction +Rh5+_04 0.4100 Prediction +Pd1+_04 0.7540 Prediction +Pd2+_04 0.7222 Prediction +Pd3+_04 0.6189 Prediction +Pd4+_04 0.4823 Prediction +Ag1+_04 1.0029 Prediction +Ag2+_04 0.7903 Prediction +Ag3+_04 0.6070 Prediction +Sb3+_04 0.5951 Prediction +Sb5+_04 0.4505 Prediction +I_5+_04 0.6091 Prediction +Cs1+_04 1.5869 Prediction +Ba2+_04 1.2398 Prediction +La3+_04 0.9034 Prediction +Ce3+_04 0.8842 Prediction +Ce4+_04 0.7412 Prediction +Pr3+_04 0.8657 Prediction +Pr4+_04 0.7225 Prediction +Nd2+_04 1.0716 Prediction +Nd3+_04 0.8563 Prediction +Pm3+_04 0.8469 Prediction +Sm2+_04 1.0528 Prediction +Sm3+_04 0.8375 Prediction +Eu2+_04 1.0586 Prediction +Eu3+_04 0.8318 Prediction +Gd3+_04 0.8161 Prediction +Tb3+_04 0.7975 Prediction +Tb4+_04 0.6350 Prediction +Dy2+_04 0.9540 Prediction +Dy3+_04 0.7882 Prediction +Ho3+_04 0.7788 Prediction +Er3+_04 0.7690 Prediction +Tm2+_04 0.9157 Prediction +Tm3+_04 0.7596 Prediction +Yb2+_04 0.9093 Prediction +Yb3+_04 0.7525 Prediction +Lu3+_04 0.7396 Prediction +Ta3+_04 0.6019 Prediction +Ta4+_04 0.5562 Prediction +Ta5+_04 0.5128 Prediction +W_4+_04 0.5375 Prediction +W_5+_04 0.4942 Prediction +Re4+_04 0.5108 Prediction +Re5+_04 0.4585 Prediction +Re6+_04 0.4258 Prediction +Os4+_04 0.5116 Prediction +Os5+_04 0.4595 Prediction +Os6+_04 0.4268 Prediction +Os7+_04 0.4042 Prediction +Ir3+_04 0.5678 Prediction +Ir4+_04 0.5125 Prediction +Ir5+_04 0.4517 Prediction +Pt2+_04 0.6923 Prediction +Pt4+_04 0.5128 Prediction +Pt5+_04 0.4511 Prediction +Au1+_04 1.2331 Prediction +Au3+_04 0.7224 Prediction +Au5+_04 0.4515 Prediction +Hg1+_04 1.0736 Prediction +Tl1+_04 1.3436 Prediction +Pb2+_04 1.0230 Prediction +Bi3+_04 0.8545 Prediction +Bi5+_04 0.6149 Prediction +Po4+_04 0.7587 Prediction +Po6+_04 0.5317 Prediction +At7+_04 0.4836 Prediction +Fr1+_04 1.7318 Prediction +Ra2+_04 1.3026 Prediction +Ac3+_04 1.0116 Prediction +Th4+_04 0.8208 Prediction +Pa3+_04 0.9237 Prediction +Pa4+_04 0.7805 Prediction +Pa5+_04 0.6587 Prediction +U_3+_04 0.9091 Prediction +U_4+_04 0.7674 Prediction +U_5+_04 0.6374 Prediction +Np2+_04 0.9891 Prediction +Np3+_04 0.8905 Prediction +Np4+_04 0.7487 Prediction +Np5+_04 0.6282 Prediction +Np6+_04 0.5951 Prediction +Np7+_04 0.5820 Prediction +Pu3+_04 0.8814 Prediction +Pu4+_04 0.7396 Prediction +Pu5+_04 0.6190 Prediction +Pu6+_04 0.5859 Prediction +Am2+_04 1.0585 Prediction +Am3+_04 0.8639 Prediction +Am4+_04 0.7312 Prediction +Cm3+_04 0.8547 Prediction +Cm4+_04 0.7316 Prediction +Bk3+_04 0.8456 Prediction +Bk4+_04 0.7128 Prediction +Cf3+_04 0.8364 Prediction +Cf4+_04 0.7036 Prediction +Zr2+_04 0.8398 Prediction +Cm2+_04 1.0782 Prediction +Pa2+_04 1.1001 Prediction +Pu2+_04 1.1135 Prediction +Ta2+_04 0.8565 Prediction +U_2+_04 1.1600 Prediction +Gd2+_04 1.0110 Prediction +Ce2+_04 1.1089 Prediction +Er2+_04 0.9445 Prediction +Ho2+_04 0.9545 Prediction +Lu2+_04 0.9051 Prediction +Pr2+_04 1.0809 Prediction +Sc2+_04 0.7280 Prediction +Tb2+_04 0.9829 Prediction +Y_2+_04 0.9468 Prediction +Th2+_04 1.2294 Prediction +La2+_04 1.1379 Prediction +Th3+_04 0.9645 Prediction +Hf3+_04 0.6789 Prediction +Tc3+_04 0.5882 Prediction +Sn2+_04 0.8354 Prediction +Be2+_08 0.6173 Prediction +B_3+_08 0.4186 Prediction +C_4+_08 0.2638 Prediction +N_3+_08 0.1985 Prediction +N_5+_08 0.1848 Prediction +Al3+_08 0.6941 Prediction +Si4+_08 0.5440 Prediction +P_3+_08 0.5779 Prediction +P_5+_08 0.5215 Prediction +S_4+_08 0.4859 Prediction +S_6+_08 0.3962 Prediction +Cl5+_08 0.5797 Prediction +Cl7+_08 0.3498 Prediction +Ti2+_08 1.0066 Prediction +Ti3+_08 0.8129 Prediction +V_2+_08 0.9380 Prediction +V_3+_08 0.7838 Prediction +V_5+_08 0.6863 Prediction +Cr2+_08 0.9547 Prediction +Cr3+_08 0.7631 Prediction +Cr4+_08 0.6925 Prediction +Cr6+_08 0.5796 Prediction +Mn3+_08 0.7966 Prediction +Mn4+_08 0.6686 Prediction +Mn5+_08 0.6296 Prediction +Mn6+_08 0.5441 Prediction +Mn7+_08 0.6045 Prediction +Fe4+_08 0.7362 Prediction +Fe6+_08 0.5306 Prediction +Co3+_08 0.7487 Prediction +Co4+_08 0.6654 Prediction +Ni2+_08 0.8255 Prediction +Ni3+_08 0.7354 Prediction +Ni4+_08 0.6044 Prediction +Cu1+_08 0.8822 Prediction +Cu2+_08 0.8714 Prediction +Cu3+_08 0.6617 Prediction +Ga3+_08 0.7538 Prediction +Ge2+_08 0.8756 Prediction +Ge4+_08 0.6532 Prediction +As3+_08 0.6983 Prediction +As5+_08 0.5673 Prediction +Se4+_08 0.5963 Prediction +Se6+_08 0.5093 Prediction +Br3+_08 0.9382 Prediction +Br5+_08 0.5733 Prediction +Br7+_08 0.4585 Prediction +Nb3+_08 0.8413 Prediction +Mo3+_08 0.8098 Prediction +Mo4+_08 0.7751 Prediction +Mo5+_08 0.7383 Prediction +Mo6+_08 0.7213 Prediction +Tc4+_08 0.7749 Prediction +Tc5+_08 0.7273 Prediction +Tc7+_08 0.6927 Prediction +Ru3+_08 0.7978 Prediction +Ru4+_08 0.7417 Prediction +Ru5+_08 0.6940 Prediction +Ru7+_08 0.6598 Prediction +Ru8+_08 0.6414 Prediction +Rh3+_08 0.7857 Prediction +Rh4+_08 0.7187 Prediction +Rh5+_08 0.6710 Prediction +Pd1+_08 0.9575 Prediction +Pd2+_08 0.9752 Prediction +Pd3+_08 0.8813 Prediction +Pd4+_08 0.7397 Prediction +Ag2+_08 1.0644 Prediction +Ag3+_08 0.8709 Prediction +Sb3+_08 0.8870 Prediction +Sb5+_08 0.7176 Prediction +Te4+_08 1.2082 Prediction +Te6+_08 0.6738 Prediction +I_5+_08 1.2429 Prediction +I_7+_08 0.6383 Prediction +Gd3+_08 1.0482 Prediction +Tm2+_08 1.1256 Prediction +Ta3+_08 0.8139 Prediction +Ta4+_08 0.7819 Prediction +W_4+_08 0.7580 Prediction +W_5+_08 0.7231 Prediction +W_6+_08 0.7079 Prediction +Re4+_08 0.7224 Prediction +Re5+_08 0.6766 Prediction +Re6+_08 0.6508 Prediction +Re7+_08 0.6345 Prediction +Os4+_08 0.7191 Prediction +Os5+_08 0.6734 Prediction +Os6+_08 0.6476 Prediction +Os7+_08 0.6315 Prediction +Os8+_08 0.6255 Prediction +Ir3+_08 0.7585 Prediction +Ir4+_08 0.7169 Prediction +Ir5+_08 0.6599 Prediction +Pt2+_08 0.8742 Prediction +Pt4+_08 0.7206 Prediction +Pt5+_08 0.6642 Prediction +Au1+_08 1.4656 Prediction +Au3+_08 0.9489 Prediction +Au5+_08 0.6643 Prediction +Hg1+_08 1.2636 Prediction +Bi5+_08 0.8708 Prediction +Po6+_08 0.7723 Prediction +At7+_08 0.7163 Prediction +Fr1+_08 1.8522 Prediction +Ac3+_08 1.2133 Prediction +Pa3+_08 1.1364 Prediction +U_3+_08 1.1281 Prediction +U_5+_08 0.8634 Prediction +Np2+_08 1.1847 Prediction +Np3+_08 1.1066 Prediction +Np5+_08 0.8525 Prediction +Np6+_08 0.8267 Prediction +Np7+_08 0.8207 Prediction +Pu3+_08 1.0955 Prediction +Pu5+_08 0.8415 Prediction +Pu6+_08 0.8158 Prediction +Cm3+_08 1.0627 Prediction +Bk3+_08 1.0517 Prediction +Cf3+_08 1.0408 Prediction +Zr2+_08 1.1030 Prediction +Cm2+_08 1.2760 Prediction +Pa2+_08 1.2972 Prediction +Pu2+_08 1.3201 Prediction +Ta2+_08 1.0759 Prediction +U_2+_08 1.3739 Prediction +Gd2+_08 1.2310 Prediction +Ce2+_08 1.3348 Prediction +Er2+_08 1.1569 Prediction +Ho2+_08 1.1674 Prediction +Lu2+_08 1.1148 Prediction +Pr2+_08 1.3030 Prediction +Sc2+_08 1.0199 Prediction +Tb2+_08 1.1992 Prediction +Y_2+_08 1.1992 Prediction +Th2+_08 1.4292 Prediction +La2+_08 1.3660 Prediction +Th3+_08 1.1768 Prediction +Hf3+_08 0.9006 Prediction +Tc3+_08 0.8524 Prediction +Sn2+_08 1.2063 Prediction +Zr3+_06 0.8095 Prediction +W_3+_06 0.7479 Prediction +Zr3+_12 1.1556 Prediction +W_3+_12 0.9725 Prediction +Zr3+_04 0.6685 Prediction +W_3+_04 0.6272 Prediction +Zr3+_08 0.9354 Prediction +W_3+_08 0.8417 Prediction +Pm2+_04 1.0512 Prediction +Pm2+_06 1.1689 Prediction +Pm2+_08 1.2702 Prediction +Pm2+_12 1.4383 Prediction +Li1+_10 1.0461 Prediction +Be2+_10 0.7786 Prediction +B_3+_10 0.5564 Prediction +C_4+_10 0.3509 Prediction +N_3+_10 0.2072 Prediction +N_5+_10 0.2145 Prediction +Na1+_10 1.2748 Prediction +Mg2+_10 1.0126 Prediction +Al3+_10 0.8389 Prediction +Si4+_10 0.6757 Prediction +P_3+_10 0.6962 Prediction +P_5+_10 0.6465 Prediction +S_4+_10 0.5758 Prediction +S_6+_10 0.4798 Prediction +Cl5+_10 0.6914 Prediction +Cl7+_10 0.3999 Prediction +Sc3+_10 1.0265 Prediction +Ti2+_10 1.1421 Prediction +Ti3+_10 0.9460 Prediction +Ti4+_10 0.8906 Prediction +V_2+_10 1.0722 Prediction +V_3+_10 0.9156 Prediction +V_4+_10 0.8592 Prediction +V_5+_10 0.8224 Prediction +Cr2+_10 1.0931 Prediction +Cr3+_10 0.8920 Prediction +Cr4+_10 0.8220 Prediction +Cr5+_10 0.7600 Prediction +Cr6+_10 0.7079 Prediction +Mn2+_10 1.1330 Prediction +Mn3+_10 0.9279 Prediction +Mn4+_10 0.7933 Prediction +Mn5+_10 0.7513 Prediction +Mn6+_10 0.6661 Prediction +Mn7+_10 0.7375 Prediction +Fe2+_10 1.0662 Prediction +Fe3+_10 0.9256 Prediction +Fe4+_10 0.8675 Prediction +Fe6+_10 0.6457 Prediction +Co2+_10 1.0256 Prediction +Co3+_10 0.8694 Prediction +Co4+_10 0.7846 Prediction +Ni2+_10 0.9387 Prediction +Ni3+_10 0.8514 Prediction +Ni4+_10 0.7112 Prediction +Cu1+_10 0.9658 Prediction +Cu2+_10 0.9903 Prediction +Cu3+_10 0.7637 Prediction +Zn2+_10 0.9940 Prediction +Ga3+_10 0.8655 Prediction +Ge2+_10 0.9897 Prediction +Ge4+_10 0.7514 Prediction +As3+_10 0.7834 Prediction +As5+_10 0.6465 Prediction +Se4+_10 0.6536 Prediction +Se6+_10 0.5645 Prediction +Br3+_10 1.1193 Prediction +Br5+_10 0.6195 Prediction +Br7+_10 0.4870 Prediction +Y_3+_10 1.1390 Prediction +Zr4+_10 0.9672 Prediction +Nb3+_10 0.9510 Prediction +Nb4+_10 0.9222 Prediction +Nb5+_10 0.8892 Prediction +Mo3+_10 0.9165 Prediction +Mo4+_10 0.8884 Prediction +Mo5+_10 0.8555 Prediction +Mo6+_10 0.8422 Prediction +Tc4+_10 0.8866 Prediction +Tc5+_10 0.8423 Prediction +Tc7+_10 0.8145 Prediction +Ru3+_10 0.8996 Prediction +Ru4+_10 0.8490 Prediction +Ru5+_10 0.8045 Prediction +Ru7+_10 0.7716 Prediction +Ru8+_10 0.7578 Prediction +Rh3+_10 0.8845 Prediction +Rh4+_10 0.8221 Prediction +Rh5+_10 0.7777 Prediction +Pd1+_10 1.0202 Prediction +Pd2+_10 1.0735 Prediction +Pd3+_10 0.9879 Prediction +Pd4+_10 0.8460 Prediction +Ag1+_10 1.3566 Prediction +Ag2+_10 1.1698 Prediction +Ag3+_10 0.9753 Prediction +Cd2+_10 1.1830 Prediction +In3+_10 1.0331 Prediction +Sn4+_10 0.9197 Prediction +Sb3+_10 0.9858 Prediction +Sb5+_10 0.8113 Prediction +Te4+_10 1.4146 Prediction +Te6+_10 0.7599 Prediction +I_5+_10 1.4999 Prediction +I_7+_10 0.7145 Prediction +Pr3+_10 1.1999 Prediction +Pr4+_10 1.0698 Prediction +Nd2+_10 1.3751 Prediction +Nd3+_10 1.1884 Prediction +Pm3+_10 1.1769 Prediction +Sm2+_10 1.3434 Prediction +Sm3+_10 1.1655 Prediction +Eu3+_10 1.1508 Prediction +Gd3+_10 1.1445 Prediction +Tb3+_10 1.1220 Prediction +Tb4+_10 0.9711 Prediction +Dy2+_10 1.2526 Prediction +Dy3+_10 1.1104 Prediction +Er3+_10 1.0874 Prediction +Tm2+_10 1.2072 Prediction +Tm3+_10 1.0758 Prediction +Yb2+_10 1.1942 Prediction +Yb3+_10 1.0637 Prediction +Lu3+_10 1.0526 Prediction +Hf4+_10 0.9071 Prediction +Ta3+_10 0.8898 Prediction +Ta4+_10 0.8673 Prediction +Ta5+_10 0.8383 Prediction +W_4+_10 0.8376 Prediction +W_5+_10 0.8091 Prediction +W_6+_10 0.7997 Prediction +Re4+_10 0.7947 Prediction +Re5+_10 0.7547 Prediction +Re6+_10 0.7340 Prediction +Re7+_10 0.7224 Prediction +Os4+_10 0.7863 Prediction +Os5+_10 0.7464 Prediction +Os6+_10 0.7261 Prediction +Os7+_10 0.7149 Prediction +Os8+_10 0.7125 Prediction +Ir3+_10 0.8118 Prediction +Ir4+_10 0.7809 Prediction +Ir5+_10 0.7285 Prediction +Pt2+_10 0.9232 Prediction +Pt4+_10 0.7914 Prediction +Pt5+_10 0.7398 Prediction +Au1+_10 1.5302 Prediction +Au3+_10 1.0264 Prediction +Au5+_10 0.7405 Prediction +Hg1+_10 1.3052 Prediction +Hg2+_10 1.1869 Prediction +Tl1+_10 1.6946 Prediction +Tl3+_10 1.0708 Prediction +Pb4+_10 0.9611 Prediction +Bi3+_10 1.2694 Prediction +Bi5+_10 0.9560 Prediction +Po4+_10 1.1893 Prediction +Po6+_10 0.8476 Prediction +At7+_10 0.7826 Prediction +Fr1+_10 1.8926 Prediction +Ra2+_10 1.5495 Prediction +Ac3+_10 1.2953 Prediction +Pa3+_10 1.2180 Prediction +Pa4+_10 1.0895 Prediction +Pa5+_10 0.9767 Prediction +U_3+_10 1.2091 Prediction +U_4+_10 1.0783 Prediction +U_5+_10 0.9524 Prediction +U_6+_10 0.9316 Prediction +Np2+_10 1.2496 Prediction +Np3+_10 1.1860 Prediction +Np4+_10 1.0553 Prediction +Np5+_10 0.9404 Prediction +Np6+_10 0.9198 Prediction +Np7+_10 0.9184 Prediction +Pu3+_10 1.1738 Prediction +Pu4+_10 1.0433 Prediction +Pu5+_10 0.9285 Prediction +Pu6+_10 0.9079 Prediction +Am2+_10 1.3580 Prediction +Am3+_10 1.1506 Prediction +Am4+_10 1.0316 Prediction +Cm3+_10 1.1385 Prediction +Cm4+_10 1.0305 Prediction +Bk3+_10 1.1265 Prediction +Bk4+_10 1.0078 Prediction +Cf3+_10 1.1145 Prediction +Cf4+_10 0.9959 Prediction +Zr2+_10 1.2089 Prediction +Cm2+_10 1.3409 Prediction +Pa2+_10 1.3712 Prediction +Pu2+_10 1.3856 Prediction +Ta2+_10 1.1538 Prediction +U_2+_10 1.4502 Prediction +Gd2+_10 1.3209 Prediction +Ce2+_10 1.4246 Prediction +Er2+_10 1.2423 Prediction +Ho2+_10 1.2563 Prediction +Lu2+_10 1.1994 Prediction +Pr2+_10 1.3910 Prediction +Sc2+_10 1.1514 Prediction +Tb2+_10 1.2880 Prediction +Y_2+_10 1.3038 Prediction +Th2+_10 1.5024 Prediction +La2+_10 1.4556 Prediction +Th3+_10 1.2582 Prediction +Hf3+_10 0.9832 Prediction +Tc3+_10 0.9630 Prediction +Sn2+_10 1.3431 Prediction +Zr3+_10 1.0501 Prediction +W_3+_10 0.9153 Prediction +Pm2+_10 1.3592 Prediction + + +Shannon: +1. Shannon's effective radii (high-spin data): Acta Cryst. (1976). A32, 751 + (http://abulafia.mt.ic.ac.uk/shannon/radius.php?orderby=Ion&dir=1) + +Prediction: +R. Ouyang, Chem. Mater. 32, 595 (2020). +(XXnn_mm - XX: element, nn: charge, mm: coordination number) + + diff --git a/utilities/Readme b/utilities/Readme new file mode 100644 index 0000000..bcd5c3f --- /dev/null +++ b/utilities/Readme @@ -0,0 +1,40 @@ + +Note: The tools may not be timely updated with the SISSO code, and therefore you may find some of them not working because of format and parameter evolution in the SISSO input and output files. + +Usage of the tools (see also the SISSO_guide.pdf): + +- SISSO_predict.f90: read the models from SISSO.out, and make prediction on unknown/test materials. +1. Installation: ifort SISSO_predict.f90 -o ~/bin SISSO_predict +2. Use: run the command 'SISSO_predict' at your working directory +3. Input files: predict.dat (containing the test materials and features, same format with train.dat); + input parameters can be provided by either interactively or from a file named SISSO_predict_para + (see the provided template). +4. Output files: predict_X.out (descriptor coordinates for the materials in predict.dat); + predict_Y.out (predicted property for the materials in predict.dat). +5. Note: avoid using the operators as variable names; otherwise the translation from string to formula will fail. + + +- SVC.py: find the SVC hyperplane for classification based on the descriptor from SISSO + run 'python SVC.py >out' at your working directory where the file train.dat exists. + Please double-check and make necessary changes in SVC.py for your applications. + +- k-fold-cv & leave-percent-out-cv: creating sub-datasets for cv with SISSO +1. Define the input parameters in the file k-fold-cv.f90 (leave-percent-out-cv.f90) +2. Compile the program: ifort k-fold-cv -o kfoldcv (ifort leave-percent-out-cv.f90 -o lpocv) +3. With the all-data train.dat and corresponding SISSO.in in the working directory, run kfoldcv (or lpocv) to generate k folders +4. Double-check and sumbit the k SISSO jobs +5. Collect and analyze your cv results from the k folders + +- Ionic_Radii +The table of extended Shannon radii (R. Ouyang, Chem. Mater. 32, 595 (2020). + +- af2traindat.f90: Automatically creating the train.dat based on atomic features for the provided compounds +1. Prepare the two input files: atom_features (list of atomic features) and samplelist (list of training samples). + See the provided templates +2. Compilation: ifort af2traindat.f90 -o af2traindat +3. Use: ./af2traindat + +- VarSelect.py: Variable selection assisted SISSO. +See the usage instruction in the VarSelect.py program. + + diff --git a/utilities/SISSO_predict.f90 b/utilities/SISSO_predict.f90 new file mode 100644 index 0000000..123ad43 --- /dev/null +++ b/utilities/SISSO_predict.f90 @@ -0,0 +1,312 @@ +program SISSO_predict +! read the models from SISSO.out and make prediction on test data in predict.dat +! input files: predict.dat which has the same format with that of train.dat +! input files: SISSO_predict_para which contains the needed parameters for this program to run. +! Format: line1, number of samples in "predict.dat"; +! line2, number of features in "predict.dat"; +! line3, highest descriptor dimension in SISSO.out; +! line4, property type (1:cont., 2: categ.) +! If this file is missing, the code will ask for input interactively. +! output files: predict_X.out, data of the descriptor components +! predict_Y.out, data of predicted Y +! Note: please make sure that no operator symbols appear in the feature names. +! E.g.: if a feature is named 'a', then 'abs()' will be mistakenly translated as xxxbs(), where xxx is the feature value. + +integer i,j,k,l,ndim,nd,nf,ns,ptype +character line*100000,pname*50 +character,allocatable:: desc(:)*500,allname(:)*50,afname(:)*50,mname(:)*50 +real*8 intercept,rmse,maxae +real*8,allocatable:: y(:),af(:,:),dat_desc(:,:),coeff(:),res(:) +logical fexist + + +inquire(file='SISSO_predict_para',exist=fexist) +if(fexist) then + open(1,file='SISSO_predict_para',status='old') + read(1,*) ns + read(1,*) nf + read(1,*) ndim + read(1,*) ptype +else + write(6,'(a)') 'Number of test-materials in the file predict.dat:' + read(5,*) ns + write(6,'(a)') 'Number of features in the file predict.dat:' + read(5,*) nf + write(6,'(a)') 'Highest dimension of the models to be read from SISSO.out:' + read(5,*) ndim + write(6,'(a)') 'Property type (1: continuous, 2: categorical): ' + read(5,*) ptype +end if + +allocate(desc(ndim)) +allocate(allname(nf+2)) +allocate(afname(nf)) +allocate(mname(ns)) +allocate(y(ns)) +allocate(af(ns,nf)) +allocate(dat_desc(ns,ndim)) +allocate(coeff(ndim)) +allocate(res(ns)) + +! predict.dat read in +open(1,file='predict.dat',status='old') +read(1,'(a)') line +call sepchange(line) +call string_split(line,allname,' ') + +if(ptype==1) then + pname=allname(2) + afname(:nf)=allname(3:) +else + afname(:nf)=allname(2:) +end if + +do i=1,ns + read(1,'(a)') line + call sepchange(line) + call string_split(line,mname(i:i),' ') + j=index(line,trim(mname(i))) + if(ptype==1) then + read(line(j+len_trim(mname(i)):),*),y(i),af(i,:) + else + read(line(j+len_trim(mname(i)):),*),af(i,:) + end if +end do +close(1) + +open(1,file='SISSO.out',status='old') +open(3,file='predict_Y.out',status='replace') +open(4,file='predict_X.out',status='replace') + +nd=0 +do while(.not. eof(1)) + read(1,'(a)') line + if(index(line,'D descriptor')/=0) then + i=index(line,'D descriptor') + read(line(:i-1),*) nd + print *,'dimension: ',nd + + do i=1,nd + read(1,'(a)') line + j=index(line,'=') + k=index(line,'feature_ID') + desc(i)=line(j+1:k-1) + desc(i)=trim(adjustl(desc(i))) + print *,'feature: ',trim(adjustl(desc(i))) + end do + + if(ptype==1) then + do while (.not. eof(1)) + read(1,'(a)') line + if(index(line,'model(y=sum')==0) cycle + exit + end do + read(1,'(a)') line + read(line(25:),*) coeff(:nd) + read(1,'(a)') line + read(line(25:),*) intercept + print *,'intercept: ',intercept + print *,'coefficients: ',coeff(:nd) + end if + + write(4,'(a,i5)') 'Descriptor coordinates at dimension: ',nd + do i=1,nd + dat_desc(:,i)=data_of_desc(ns,nf,desc(i),af,afname) + end do + do i=1,ns + write(4,'(e20.10)') dat_desc(i,:nd) + end do + + if(ptype==1) then + write(3,'(a,i5)') 'Predictions (y,pred,y-pred) by the model of dimension: ',nd + do i=1,ns + res(i)=y(i)-(intercept+sum(dat_desc(i,:nd)*coeff(:nd))) + write(3,'(3e20.10)') y(i),(intercept+sum(dat_desc(i,:nd)*coeff(:nd))),res(i) + end do + rmse=sqrt(sum(res**2)/ns) + maxae=maxval(abs(res)) + print *,'RMSE and MaxAE: ',rmse,maxae + write(3,'(a,2f20.10)') 'Prediction RMSE and MaxAE: ',rmse,maxae + elseif(ptype==2) then + write(3,'(a)') 'No prediction data for classification!' + end if + end if + if(nd==ndim) exit +end do + + +close(1) +close(3) +close(4) +write(6,'(a/)') 'See details in the output files predict_Y.out and predict_X.out!' + +deallocate(desc) +deallocate(allname) +deallocate(afname) +deallocate(mname) +deallocate(y) +deallocate(af) +deallocate(dat_desc) +deallocate(coeff) +deallocate(res) + +contains + +function data_of_desc(ns,nf,desc_str,af,afname) +integer i,j,k,n,ns,nf,imax,length,maxlength +character desc_str*500,afname(nf)*50,desc_expr(ns)*500,op(17)*10 +real*8 af(ns,nf),data_of_desc(ns) +logical fexist,isop,isvar + +op=(/'+','-','*','/','exp','exp(-','^-1','^2','^3','sqrt','cbrt','log','abs','scd','^6','sin','cos'/) +nop=17 +n=0 +k=1 +desc_expr='' + +do while(k<=len_trim(desc_str)) + isop=.false. + isvar=.false. + + ! detecting operators at potition k: + maxlength=0 + do i=1,nop ! find the longest operator that appears in the string + if(index(desc_str(k:),trim(adjustl(op(i))))==1) then + length=len_trim(adjustl(op(i))) + isop=.true. + if(length>maxlength) then + maxlength=length + imax=i + end if + end if + end do + if(maxlength>0) then ! including the operator + desc_expr(:)(n+1:n+len_trim(adjustl(op(imax))))=trim(adjustl(op(imax))) + n=n+len_trim(adjustl(op(imax))) ! updated length of the string in desc_expr + k=k+len_trim(adjustl(op(imax))) ! updated start position in desc_str + end if + + ! if not operator, detecting variables at position k: + if(.not. isop) then + maxlength=0 + do i=1,nf ! find the longest variable that appear in the descriptor + if(index(desc_str(k:),trim(adjustl(afname(i))))==1) then + length=len_trim(adjustl(afname(i))) + isvar=.true. + if(length>maxlength) then + maxlength=length + imax=i + end if + end if + end do + if(maxlength>0) then ! recreating the express by replacing the variables with numbers + do j=1,ns + write(desc_expr(j)(n+1:n+22),'(a,f20.10,a)') '(',af(j,imax),')' + end do + n=n+22 ! updated length of the stringn in desc_expr + k=k+len_trim(adjustl(afname(imax))) ! updated start position in desc_str + end if + end if + + ! if not operator and variable + if( (.not. isop) .and. (.not. isvar) ) then + desc_expr(:)(n+1:n+1)=desc_str(k:k) + n=n+1 + k=k+1 + end if + +end do + +inquire(file='desc_tmp',exist=fexist) +if(fexist) call system('rm desc_tmp') +do i=1,ns + call nospace(desc_expr(i)) + call system('echo "define abs(i){if(i<0) return (-i);return(i)} define exp(i){return(e(i))} & + define sin(i){return(s(i))} define log(i){return(l(i))} & + define cbrt(i){ if(i<0) return (-e(l(-i)/3)); if(i==0) return 0; return e(l(i)/3) } & + define cos(i){return(c(i))} define scd(i){ return (1.0/3.14159265/(1+i^2))} & + '//trim(adjustl(desc_expr(i)))//' " |bc -l >>desc_tmp') +end do + +open (111,file='desc_tmp',status='old') +do i=1,ns + read(111,*) data_of_desc(i) +end do +close(111) +call system('rm desc_tmp') + +end function + + +subroutine nospace(fname) +character(len=*) fname +character string*500 +integer i,j,k +string='' +k=0 +do j=1,len_trim(fname) +if(fname(j:j)==' ' .or. fname(j:j)=='') cycle +k=k+1 +string(k:k)=fname(j:j) +end do + +i=index(string,'exp(-') +if(i>0) then + fname(:i+3)=string(:i+3) + fname(i+4:i+4)='0' + fname(i+5:)=string(i+4:) +else + fname=string +end if +end subroutine + +subroutine string_split(instr,outstr,sp) +! break a string into sub-strings +! input: instr, input string; sp, separator +! output: outstr, output sub-strings +character(len=*) instr,outstr(:),sp +integer n,i,j +logical isend + +isend=.false. +n=0 +outstr='' + +i=index(instr,sp) +if(i/=0) then + if(i/=1) then + n=n+1 + outstr(n)=instr(1:i-1) + end if + do while ((.not. isend) .and. n1) then + n=n+1 + outstr(n)=instr(i+len(sp):i+len(sp)-1+j-1) + end if + i=i+len(sp)+j-1 + else + isend=.true. + end if + end do +end if + +end subroutine + +subroutine sepchange(line) +character(len=*) line +do while (index(line,char(9))/=0) ! separator TAB to space + line(index(line,char(9)):index(line,char(9)))=' ' +end do +do while (index(line,',')/=0) ! separator comma to space + line(index(line,','):index(line,','))=' ' +end do +end subroutine + + +end program diff --git a/utilities/SISSO_predict_para b/utilities/SISSO_predict_para new file mode 100644 index 0000000..cf1406b --- /dev/null +++ b/utilities/SISSO_predict_para @@ -0,0 +1,4 @@ +xxx ! Number of materials in the file predict.dat (same format with train.dat) +xxx ! Number of features in the file predict.dat +xxx ! Highest dimension of the models to be read from SISSO.out +xxx ! Property type 1:continuous or 2:categorical diff --git a/utilities/SVC.py b/utilities/SVC.py new file mode 100644 index 0000000..28a8f82 --- /dev/null +++ b/utilities/SVC.py @@ -0,0 +1,108 @@ +# find a SVM line for classification based on the descriptor from SISSO +# read train.dat + +import numpy as np +import time +from sklearn.model_selection import GridSearchCV +from sklearn.svm import LinearSVC +from sklearn.svm import SVC +from sklearn.model_selection import LeaveOneOut + +start=time.time() + +# training data +inp=open('train.dat','r').readlines() +X=[] +y_train=np.array([]) +for i in range(1,len(inp)): # from the second line to the last line + line=str(inp[i]).split() + feat=[] + for j in range(2,len(line)): # line[0]: name; line[1]: property; line[2] first feature + feat.append(float(line[j])) + X.append(feat) + y_train=np.append(y_train,float(line[1])) +X_train=np.array(X) + +# test data +#inp=open('predict.dat','r').readlines() +#X=[] +#y_predict=np.array([]) +#for i in range(1,len(inp)): +# line=str(inp[i]).split() +# feat=[] +# for j in range(2,len(line)): +# feat.append(float(line[j])) +# X.append(feat) +# y_predict=np.append(y_predict,float(line[1])) +#X_predict=np.array(X) + +# rbf kernel +#clf = GridSearchCV(SVC(kernel='rbf',tol=1e-4,max_iter=-1), +# cv=10, n_jobs=-1, verbose=1, +# param_grid={"C": np.logspace(0, 6, 30, base=10), +# "gamma": np.logspace(-6, 2, 30, base=10)}, +# return_train_score=True,refit=True) +# Linear kernel +clf = GridSearchCV(SVC(kernel='linear',tol=1e-4,max_iter=-1), + cv=10, n_jobs=-1, verbose=1, + param_grid={"C": np.logspace(1, 5, 10, base=10)}, + return_train_score=True,refit=True) + +print("training:") +clf.fit(X_train, y_train) +print("params: ") +print(clf.cv_results_['params']) +print("mean_test_score: ") +print(clf.cv_results_['mean_test_score']) +print("mean_train_score: ") +print(clf.cv_results_['mean_train_score']) +print("best CV score: ") +print(clf.best_score_) +print("best_params: ") +print(clf.best_params_) +print("best_estimator: ") +print(clf.best_estimator_) +print("coef: (sum(c_i*x_i)+c_0=0) ") +print(clf.best_estimator_.coef_) +print("intercept: ") +print(clf.best_estimator_.intercept_) +print("support: ") +print(clf.best_estimator_.support_) +print(clf.best_estimator_.support_vectors_) + +def train_output(X_train,y_train): + y_pred=clf.predict(X_train) + er=0 + fail_index=[] + for i in range(len(y_train)): + print('Y_ture, Y_pred, data_point: ',np.around(y_train[i],decimals=3),np.around(y_pred[i],decimals=3),i) + if np.sign(y_train[i]) != np.sign(y_pred[i]): + er+=1 + fail_index.append(i) + print('Wrong prediction: Data point ',fail_index) + print("Total number of misclassified data: ",er) +train_output(X_train,y_train) +print("training_score (accuracy): ",clf.best_estimator_.score(X_train,y_train)) + + +#def validate_output(X_predict,y_predict): +# y_pred=clf.predict(X_predict) +# er=0 +# fail_index=[] +# for i in range(len(y_predict)): +# print('Y_ture, Y_pred, data_point: ',np.around(y_predict[i],decimals=3),np.around(y_pred[i],decimals=3),i) +# if np.sign(y_predict[i]) != np.sign(y_pred[i]): +# er+=1 +# fail_index.append(i) +# print('Wrong prediction: Data point ',fail_index) +# print("Total number of misclassified data: ",er) + +#print("validation: ") +#validate_output(X_predict,y_predict) +#print("validation_score (accuracy): ",clf.best_estimator_.score(X_predict,y_predict)) + + + +end=time.time() +print("wall-clock time (seconds):",end-start) +# diff --git a/utilities/VarSelect_SISSO.py b/utilities/VarSelect_SISSO.py new file mode 100644 index 0000000..3891489 --- /dev/null +++ b/utilities/VarSelect_SISSO.py @@ -0,0 +1,545 @@ +# Created by Zhen Guo and Runhai Ouyang, 2022.2 +# Variable Selection for SISSO (VS-SISSO), J. Chem. Theory Comput. 18, 4945 (2022) +# Usage: prepare the SISSO.in and train.dat in working directory, and then run the program +# by using python3 or new versions: python3 VarSelect_SISSO.py + + +############################################ User Input ##################################### +# The values below are good for usual computing power. Please make changes for your jobs +# ------------------------------------------------------------------------------------------- +n_init = 10 # Initial size of the S in [J. Chem. Theory Comput. 18, 4945 (2022)] +n_RS = 4 # The size of the Sa +n_max =23 # Maximal size of the S (S=Sa+Sb) +nstep_max =100 # Maximal iterations +nstep_converge = 20 # Converged if the best model unchanged after given number of steps. +restart = 0 # 0: start from scratch, 1: continue the unfinished job +runSISSO = 'mpirun -np 64 SISSO.3.3 > SISSO.log' # The command to run SISSO +############################################################################################# + + + +import os +import copy +import time +import random +import math + + +def SISSO_out_reader(SISSO_out_folder, dimension, all_features, maths_operators): + # From SISSO.out, read the errors and involved primary features in all the models + + SISSO_out_file = open('%s/SISSO.out' % SISSO_out_folder, 'r').readlines() + for i in range(len(SISSO_out_file)): + SISSO_out_file[i] = SISSO_out_file[i].strip() + feature_list = [] + descriptor_dict = {} + descriptor_dim = 0 + for i in range(len(SISSO_out_file)): + if SISSO_out_file[i].count('D descriptor'): + descriptor_dim += 1 + descriptor_list = [] + + for d in range(descriptor_dim): + descriptor = SISSO_out_file[i + d + 1] + + for st in range(len(descriptor)): + if descriptor[st] == '=': + descriptor = descriptor[st + 1:].split()[0] + break + for feature in descriptor_2_features(descriptor, all_features, maths_operators): + feature_list.append(feature) + feature_list = list(set(feature_list)) + descriptor_list.append(descriptor) + descriptor_dict.update({descriptor_dim: feature_list}) + total_RMSE_dict = {} + RMSE_dim = 0 + for i in range(len(SISSO_out_file)): + if SISSO_out_file[i].count('RMSE and MaxAE of the model:'): + RMSE_dim += 1 + RMSE_str = SISSO_out_file[i].replace('RMSE and MaxAE of the model:', '').strip() + RMSE = '' + for j in list(RMSE_str): + if j != ' ': + RMSE += j + else: + break + total_RMSE_dict.update({RMSE_dim: float(RMSE)}) + return feature_list, total_RMSE_dict[dimension] + +def descriptor_2_features(descriptor, all_features,maths_operators): +# Identify the primary features in a descriptor formula + import copy + + brace = [] + brace_position = [] + for i in range(len(descriptor)): + if descriptor[i] == '(': + brace.append(0) + brace_position.append(i) + if descriptor[i] == ")": + brace.append(1) + brace_position.append(i) + + features = [] + + while brace: + for i in range(len(brace)): + if (brace[i] == 0) and (brace[i + 1] == 1): + features.append(descriptor[brace_position[i] + 1:brace_position[i + 1]]) + # if features[-1].startswith('('): + # del features[-1] + + del brace[i:i + 2] + del brace_position[i:i + 2] + break + + features_new = [] + for feature in features: + features_new.append(feature) + + for Feature in features: + maths_operator_position = [] + maths_operator_length = [] + for i in range(len(Feature)): + for operator in maths_operators: + op_len = len(operator) + if Feature[i:i + op_len] == operator: + maths_operator_position.append(i) + maths_operator_length.append(op_len) + break + Feature_cp = copy.copy(Feature) + count = 0 + count_max = len(copy.copy(maths_operator_position)) + while count < count_max: + + for j in range(len(maths_operator_position)): + features_new.append(Feature_cp[:maths_operator_position[j]]) + features_new.append(Feature_cp[maths_operator_position[j] + maths_operator_length[j]:]) + + maths_operator_length_0 = maths_operator_length[:1][0] + maths_operator_position[:1][0] + Feature_cp = Feature_cp[maths_operator_length_0:] + del maths_operator_length[:1] + del maths_operator_position[:1] + for j in range(len(maths_operator_position)): + maths_operator_position[j] = maths_operator_position[j] - maths_operator_length_0 + + count += 1 + + features_out = [] + for i in features_new: + if (i not in features_out) & (i in all_features): + features_out.append(i) + + return features_out + +def initial_SISSO_in_2_output_parameter(initial_file_dir, all_features_list, output_parameter): + # Read information from SISSO.in + + parameter_startwith_dict = {1: "funit", 2: "ops", 3: 'desc_dim'} + + SISSO_in_file = open('%s/SISSO.in' % initial_file_dir, 'r').readlines() + for i in range(len(SISSO_in_file)): + SISSO_in_file[i] = SISSO_in_file[i].strip() + output_para_in_file = '' + for i in range(len(SISSO_in_file)): + if SISSO_in_file[i].startswith(parameter_startwith_dict[output_parameter]): + output_para_in_file = SISSO_in_file[i] + # output_para_in_file = output_para_in_file.replace(parameter_startwith_dict[output_parameter], '') + output_para_in_file = output_para_in_file.split('=')[1] + if output_parameter == 1: # funit + funit = output_para_in_file + funit = funit.replace('(', "") + for alp in range(len(funit)): + if funit[alp] == '!': + funit = funit[:alp].strip() + break + if funit == ')': + return [[]] + else: + funit_list = [] + operator = '' + for i in funit: + if i == '!': + break + if (i != ':' and i != ")"): + operator += i + else: + funit_list.append(int(operator)) + operator = '' + + if (funit_list[-1] > len(all_features_list)): + exit('Error: funit out of rangeļ¼\n' + 'Check the parameter \'funit\' and \'nsf\' in SISSO.in') + if len(funit_list) % 2 != 0: + exit('Error: wrong \'funit\' setting! \n' + 'Check the parameter \'funit\' in SISSO.in') + feature_class = [] + list_new = [] + for i in range(len(funit_list)): + if i % 2 == 1: + if i != len(funit_list) - 1: + list_new += all_features_list[funit_list[i]:funit_list[i + 1] - 1] + else: + list_new += all_features_list[funit_list[i]:] + continue + feature_class.append(all_features_list[funit_list[i] - 1:funit_list[i + 1]]) + # if list_new != []: + # feature_class.append(list_new) + return feature_class + + + elif output_parameter == 2: # ops + operators_list = [] + operators = output_para_in_file.split('!')[0].strip() + operators = operators.replace("','", "") + operators = operators.replace(")(", " ") + operators = operators.replace("'(", "").replace(")'", "") + operators_list = list(set(operators.split())) + return operators_list + elif output_parameter == 3: + desc_dim = output_para_in_file + desc_dim = desc_dim.replace('desc_dim=', '') + desc_dim = desc_dim[:3] + desc_dim = desc_dim.strip() + return int(desc_dim) + + +def initial_train_dat_2_output_parameter(train_dat_folder, output_parameter): + # Read data from train.dat + + train_dat_lines = open('%s/train.dat' % train_dat_folder).readlines() + for line in range(len(train_dat_lines)): + train_dat_lines[line] = train_dat_lines[line].replace(',',' ').replace('\t',' ') + train_dat_lines[line] = train_dat_lines[line].strip() + if not train_dat_lines[line]: + train_dat_lines.remove('') + for line in range(len(train_dat_lines)): + train_dat_lines[line] = train_dat_lines[line].split() + features_name_list = train_dat_lines[0][2:] + materials_name = train_dat_lines[0][0] + property_name = train_dat_lines[0][1] + train_dat = {} + for line in range(len(train_dat_lines)): + if line == 0: + for num in range(len(train_dat_lines[line])): + train_dat.update({train_dat_lines[line][num]: []}) + else: + for num in range(len(train_dat_lines[line])): + list_temp = train_dat[train_dat_lines[0][num]] + list_temp.append(train_dat_lines[line][num]) + train_dat.update({train_dat_lines[0][num]: list_temp}) + + if output_parameter == 1: + return materials_name + elif output_parameter == 2: + return property_name + elif output_parameter == 3: + return list(features_name_list) + elif output_parameter == 4: + return train_dat + + +def build_SISSO_in(initial_SISSO_in_folder, new_SISSO_in_folder, new_features_class, features_list): + # Update SISSO.in for new iteration + import os + # new_features_class = [ ["f1","f2","f3"], ["f4","f5"],["f6"] ] + number_feature = len(features_list) + if new_features_class == []: + f_unit = 'f_unit=()\n' + else: + n_group = [] + for i in range(len(new_features_class)): + n_group.append(len(new_features_class[i])) + + f_unit_list = [1] + f_unit = 'funit=(1:' + for i in range(len(n_group)): + if i == 0: + f_unit_list.append(f_unit_list[0] + n_group[i] - 1) + f_unit += (str(f_unit_list[-1]) + ')') + else: + f_unit_list.append(f_unit_list[-1] + 1) + f_unit += ('(' + str(f_unit_list[-1]) + ':') + f_unit_list.append(n_group[i] - 1 + f_unit_list[-1]) + f_unit += (str(f_unit_list[-1]) + ')') + f_unit += '\n' + nsf = 'nsf=%s\n' % number_feature + SISSO_in = open('%s/SISSO.in' % initial_SISSO_in_folder, 'r').readlines() + for i in range(len(SISSO_in)): + # SISSO_in[i] = SISSO_in[i].lstrip() + if SISSO_in[i].startswith('funit'): + SISSO_in[i] = f_unit + if SISSO_in[i].startswith('nsf'): + SISSO_in[i] = nsf + + if os.path.exists(new_SISSO_in_folder): + open('%s/SISSO.in' % new_SISSO_in_folder, 'w').writelines(SISSO_in) + else: + os.mkdir(new_SISSO_in_folder) + open('%s/SISSO.in' % new_SISSO_in_folder, 'w').writelines(SISSO_in) + + +def features_classification(features_list, all_features_class): + # Group the primary features for creating new train.dat according to their dimensions/units + features_class = [] + for i in all_features_class: + list_new = list(set(i).intersection(features_list)) + features_class.append(list_new) + + features_class_new = [] + for i in features_class: + if i: + features_class_new.append(i) + + return features_class_new + + +def build_train_dat(new_train_dat_folder, new_features_class, initial_train_dat, compounds_column_name, + property_column_name, features_list): + # Creat train.dat for new iterations. + import copy + dimensionless_features = copy.copy(features_list) + if new_features_class: + for i in new_features_class: + for j in i: + dimensionless_features.remove(j) + new_train_dat_lines = [] + sample_num = len(initial_train_dat[property_column_name]) + for tmp_0 in range(sample_num): + if tmp_0 == 0: + tmp_line = '' + for tmp in (compounds_column_name, property_column_name): + tmp_line += '%s ' % tmp + for tmp_1 in new_features_class + [dimensionless_features]: + for tmp_2 in tmp_1: + if tmp_1: + tmp_line += '%s ' % tmp_2 + new_train_dat_lines.append(tmp_line + '\n') + tmp_line = '' + for tmp in (initial_train_dat[compounds_column_name][tmp_0], initial_train_dat[property_column_name][tmp_0]): + tmp_line += '%s ' % tmp + for tmp_1 in new_features_class + [dimensionless_features]: + for tmp_2 in tmp_1: + if tmp_2: + tmp_line += '%s ' % initial_train_dat[tmp_2][tmp_0] + new_train_dat_lines.append(tmp_line + '\n') + open('%s/train.dat' % new_train_dat_folder, 'w').writelines(new_train_dat_lines) + + +def check_done(task_folder): + # Check if the SISSO job was done successfully. + + import os, time + file_list = [] + for root, folders, files in os.walk(task_folder): + for j in files: + file_list.append(j) + + if 'SISSO.out' in file_list: + SISSO_out_read = open('%s/SISSO.out' % task_folder, 'r').readlines() + if len(SISSO_out_read) != 0: + if SISSO_out_read[-4].startswith('Total time (second):'): + os.system('rm -rf %s/feature_space' % task_folder) + return 1 + else: + return 0 + else: + return 0 + else: + return 0 + + +def random_features_list(all_features, selected_features, alpha_dict, n_init): + # Update of the primary features for new train.dat + + unselected_features_list = list(set(all_features) - set(selected_features)) + rand_list = [] + if 1 in alpha_dict.values(): + for i in unselected_features_list: + rand_list.append([i, random.random() * (alpha_dict[i])]) + if 1 not in alpha_dict.values(): + for i in unselected_features_list: + rand_list.append([i, random.random()]) + # bubble sort + for i in range(len(rand_list)): + for j in range(len(rand_list) - i - 1): + if rand_list[j][1] < rand_list[j + 1][1]: + rand_list[j], rand_list[j + 1] = rand_list[j + 1], rand_list[j] + feature_new = [] + for i in rand_list[: n_init - len(selected_features)]: + feature_new.append(i[0]) + + return selected_features + feature_new + + +def update_alpha_list(alpha_dict, selected_features, features_list, alpha): + # Update the penalty factor + + # features_list_dropped = list(set(features_list) - set(selected_features)) + for i in features_list: + alpha_old = alpha_dict[i] + alpha_dict.update({i: alpha_old * alpha}) + + return alpha_dict + + +def read_feature_list_from_train_data(task_folder): + train_dat = open('%s/train.dat' % task_folder, 'r').readlines() + columns = train_dat[0].split() + return columns[2:], len(columns[2:]) + + +def check_last_step(): + os.system('ls -F | grep \'/$\' > .temp_file') + time.sleep(5) + dir_list = open('./.temp_file', 'r').readlines() + for i in range(len(dir_list)): + dir_list[i] = dir_list[i].strip()[:-1] + max_num = -1 + for i in dir_list: + if i.isnumeric(): + if max_num < int(i): + max_num = int(i) + + IF_continue_step_done = 0 + if max_num != -1: + file_list = os.listdir('./%s' % str(max_num)) + if 'SISSO.out' in file_list: + SISSO_out_read = open('%s/SISSO.out' % str(max_num), 'r').readlines() + if len(SISSO_out_read) != 0: + if SISSO_out_read[-4].startswith('Total time (second):'): + IF_continue_step_done = 1 + os.system('rm -f .temp_file') + return max_num, IF_continue_step_done + + +# ----------------------------- +time_start = time.time() +initial_file_folder = './' +compounds_column_name = initial_train_dat_2_output_parameter(initial_file_folder, 1) +property_column_name = initial_train_dat_2_output_parameter(initial_file_folder, 2) +all_features = initial_train_dat_2_output_parameter(initial_file_folder, 3) +train_dat = initial_train_dat_2_output_parameter(initial_file_folder, 4) + +initial_maths_operators = initial_SISSO_in_2_output_parameter(initial_file_folder, all_features, 2) +all_features_class = initial_SISSO_in_2_output_parameter(initial_file_folder, all_features, 1) +desc_dim = initial_SISSO_in_2_output_parameter(initial_file_folder, all_features, 3) +selected_features = [] +selected_features_list = [] + +features_list = [] +# train_features=[] +features_list_list = [] +RMSE_list = [] +min_RMSE_list = [] +VS_results = open('./VS_results', 'a') +VS_results.write( + "Iter. \t %of_visited_var.\tRMSE_of_this_step\tLowest_RMSE\t[var. in the lowest-RMSE-model]\t[var. deselected at this step]\t\n") +VS_results.close() +alpha_dict = {} +for i in all_features: + alpha_dict.update({i: 1}) +min_RMSE_step = 0 +visited_set = set() +alpha = 0 + +continue_step, IF_continue_step_done = check_last_step() +if continue_step >= 0: + # os.system('cp VS_results VS_results_old') + if IF_continue_step_done == 0: + os.system('rm -rf %s' % str(continue_step)) + +for i in range(nstep_max): + if restart : + + if i < continue_step + IF_continue_step_done: + features_list, n_init = read_feature_list_from_train_data('./%s' % str(i)) + selected_features, RMSE = SISSO_out_reader('./%s' % str(i), desc_dim, all_features, initial_maths_operators) + alpha_dict = update_alpha_list(alpha_dict, selected_features, features_list, alpha) + # train_features = initial_train_dat_2_output_parameter('./%s' % str(i), 3) + features_list_list.append(features_list) + RMSE_list.append(RMSE) + selected_features_list.append(selected_features) + visited_set.update(features_list) + completed_percent = float(len(visited_set)) / float(len(all_features)) + if RMSE < RMSE_list[min_RMSE_step]: + min_RMSE_step = i + elif RMSE == RMSE_list[min_RMSE_step] and len(selected_features) <= len(selected_features_list[min_RMSE_step]): + min_RMSE_step = i + else: + selected_features, RMSE = SISSO_out_reader('./%s' % str(min_RMSE_step), desc_dim, all_features, + initial_maths_operators) + + min_RMSE_list.append(RMSE_list[min_RMSE_step]) + continue + + VS_results = open('./VS_results', 'a') + VS_log = open("./VS_log", 'a') + new_folder = './%s' % str(i) + try: + os.mkdir(new_folder) + except: + print('folder %s/ already exist!\n' % str(i)) + + VS_log.write('==========' * 10) + VS_log.write('\nIteration\t%s\n' % str(i)) + alpha_dict = update_alpha_list(alpha_dict, selected_features, features_list, alpha) + features_list = random_features_list(all_features, selected_features, alpha_dict, n_init) + new_features_class = features_classification(features_list, all_features_class) + build_SISSO_in(initial_file_folder, new_folder, new_features_class, features_list) + build_train_dat(new_folder, new_features_class, train_dat, compounds_column_name, property_column_name,features_list) + os.chdir(new_folder) + os.system('%s' % runSISSO) + os.chdir('../') + + # time.sleep(5) + # while True: + # check_num = check_done(new_folder) + # time.sleep(5) + # if check_num == 1: + # break + + selected_features, RMSE = SISSO_out_reader('./%s' % str(i), desc_dim, all_features, initial_maths_operators) + features_list_list.append(features_list) + RMSE_list.append(RMSE) + selected_features_list.append(selected_features) + features_list = initial_train_dat_2_output_parameter('./%s' % str(i), 3) + visited_set.update(features_list) + completed_percent = float(len(visited_set)) / float(len(all_features)) + # if len(visited_set) == len(all_features): + # alpha = 1 + # for f in all_features: + # alpha_dict.update({f: 1}) + + if RMSE < RMSE_list[min_RMSE_step]: + min_RMSE_step = i + elif RMSE == RMSE_list[min_RMSE_step] and len(selected_features) <= len(selected_features_list[min_RMSE_step]): + min_RMSE_step = i + else: + selected_features, RMSE = SISSO_out_reader('./%s' % str(min_RMSE_step), desc_dim, all_features, + initial_maths_operators) + VS_results.write(' %s\t%s\t%.06f\t%s\t%s\t%s\t\n' % ( + str(i), format(completed_percent, '.1%'), RMSE_list[-1], RMSE_list[min_RMSE_step], + selected_features, list(set(features_list).difference(set(selected_features))))) + + n_init = len(selected_features) + n_RS + if n_init > n_max: + VS_results.write('Warning: The subset size hits maximum, the n_RS for the next step is reduced to %s\n' % str( + n_RS -(n_init - n_max))) + n_init = n_max + + VS_log.write("Unvisited variables (labeled as 1) : \n%s\n\n" % alpha_dict) + #VS_log.write('Number of variables in the lowest-RMSE-model is %s, at iteration %s, with the RMSE %06f\n' % ( + # str(len(selected_features)), str(min_RMSE_step), RMSE_list[min_RMSE_step])) + #VS_log.write('Number of variables for the next SISSO is %s \n' % str( n_init)) + VS_log.close() + min_RMSE_list.append(RMSE_list[min_RMSE_step]) + if len(RMSE_list) >= nstep_converge: + if len(list(set(min_RMSE_list[-nstep_converge:]))) == 1: + VS_results.write('Stop! \n') + VS_results.close() + break +time_end = time.time() +VS_results = open('./VS_results', 'a') +VS_results.write('%s\t%s\n' % ('Total time (second):', str(round(time_end - time_start, 2)))) +VS_results.close() diff --git a/utilities/af2traindat.f90 b/utilities/af2traindat.f90 new file mode 100644 index 0000000..3fd8435 --- /dev/null +++ b/utilities/af2traindat.f90 @@ -0,0 +1,131 @@ +program af2traindat +! Creating the train.dat based on the files atom_features (list of atomic features) and +! samplelist (list of the training samples). + +real*8,allocatable:: feat(:,:) +character line*1000,atomline*1000,strings(500)*50,letter(14)*2 +character,allocatable:: featname(:,:)*50,element(:)*10 +integer i,j,k,k1,k2,k3,k4,k5,k6,natom,naf,nele,npair,length + +! mark for the constituent elements of a compound +letter=(/'_A','_B','_C','_D','_E','_F','_G','_H','_I','_J','_K','_L','_M','_N'/) + +! files atom_features and samplelist must be ready +write(*,'(a)') 'Number of atomic features to input from the file "atom_features"' +read(*,*) naf +write(*,'(a)'),'Number of elements in each sample in the file "samplelist" ' +read(*,*),nele + +allocate(feat(nele,naf)) +allocate(featname(nele,naf)) +allocate(element(nele)) + +open(1,file='train.dat',status='replace') +open(2,file='samplelist',status='old') +open(3,file='atom_features',status='old') + +! read the title line from atom_features for the feature names +read(3,'(a)'),line +call string_split(line,strings,' ') +do i=1,naf +do j=1,nele +featname(j,i)=trim(adjustl(strings(i+1)))//letter(j) +end do +end do +rewind(3) + +! get the element-name length +read(2,*) ! skip the title line +read(2,'(a)'),line +line=adjustl(line) +length=(index(line,' ')-1)/nele +rewind(2) +! write the title line to the train.dat +write(1,'(a,a15)'),'compound',(((trim(adjustl(featname(k,i)))),k=1,nele),i=1,naf) + +! creating the file train.dat +read(2,*) ! skip the title line +do while(.not. eof(2)) +read(2,'(a)'),line + ! get the compound name + call string_split(line,strings,' ') + strings(1)=trim(adjustl(strings(1))) + do i=1,nele + element(i)=strings(1)((i-1)*length+1:i*length) + end do + + ! get the feature data from atom_features + do i=1,nele + rewind(3) + 100 read(3,'(a)',end=101),atomline + k=index(atomline,trim(adjustl(element(i)))) + if(k/=0) then + read(atomline(k+length:),*),feat(i,:naf) + cycle + else + goto 100 + end if + 101 continue + print *,'atom not found: ', trim(adjustl(element(i))) + stop + end do + + + ! write the feature data + write(1,'(X,a,f15.5)'),(trim(adjustl(element(i))),i=1,nele),& + ((feat(j,i),j=1,nele),i=1,naf) +end do + +close(1) +close(2) +close(3) + +deallocate(feat) +deallocate(featname) +deallocate(element) + +contains + + +subroutine string_split(instr,outstr,sp) +! break a string into sub-strings +! input: instr, input string; sp, separator +! output: outstr, output sub-strings +character(len=*) instr,outstr(:),sp +integer n,i,j +logical isend + +isend=.false. +n=0 +outstr='' + +i=index(instr,sp) +if(i/=0) then + if(i/=1) then + n=n+1 + outstr(n)=instr(1:i-1) + end if + do while ((.not. isend) .and. n1) then + n=n+1 + outstr(n)=instr(i+len(sp):i+len(sp)-1+j-1) + end if + i=i+len(sp)+j-1 + else + isend=.true. + end if + end do +end if + +end subroutine + + +end + + diff --git a/utilities/atom_features b/utilities/atom_features new file mode 100644 index 0000000..1f053d5 --- /dev/null +++ b/utilities/atom_features @@ -0,0 +1,103 @@ +atom Z IE XP Rc XAR ! title line, feature names +H_ 1 13.59 2.20 0.31 2.20 ! elements and feature data +Li 3 5.391 0.98 1.28 0.97 +Na 11 5.138 0.93 1.66 1.01 +K_ 19 4.340 0.82 2.03 0.91 +Rb 37 4.176 0.82 2.20 0.89 +Cs 55 3.893 0.79 2.44 0.86 +Fr 87 4.073 0.70 2.60 0.86 +Be 4 9.322 1.57 0.96 1.47 +Mg 12 7.645 1.31 1.41 1.23 +Ca 20 6.112 1.00 1.76 1.04 +Sr 38 5.695 0.95 1.95 0.99 +Ba 56 5.212 0.89 2.15 0.97 +Ra 88 5.279 0.97 2.21 0.97 +Sc 21 6.561 1.36 1.70 1.20 +Y_ 39 6.218 1.22 1.90 1.11 +Lu 71 5.425 1.27 1.87 1.14 +Ti 22 6.827 1.54 1.60 1.32 +Zr 40 6.634 1.33 1.75 1.22 +Hf 72 6.824 1.30 1.75 1.23 +V_ 23 6.746 1.63 1.53 1.45 +Nb 41 6.758 1.60 1.64 1.23 +Ta 73 7.887 1.50 1.70 1.33 +Cr 24 6.766 1.66 1.39 1.56 +Mo 42 7.092 2.16 1.54 1.30 +W_ 74 7.980 2.36 1.62 1.40 +Mn 25 7.434 1.55 1.39 1.60 +Tc 43 7.275 1.90 1.47 1.36 +Re 75 7.877 1.90 1.51 1.46 +Fe 26 7.902 1.83 1.32 1.64 +Ru 44 7.361 2.22 1.42 1.42 +Os 76 8.706 2.20 1.44 1.52 +Co 27 7.880 1.88 1.26 1.70 +Rh 45 7.459 2.28 1.42 1.45 +Ir 77 9.121 2.20 1.41 1.55 +Ni 28 7.639 1.91 1.24 1.75 +Pd 46 8.337 2.20 1.39 1.35 +Pt 78 9.017 2.28 1.36 1.44 +Cu 29 7.726 1.90 1.32 1.75 +Ag 47 7.576 1.93 1.45 1.42 +Au 79 9.225 2.54 1.36 1.42 +Zn 30 9.394 1.65 1.22 1.66 +Cd 48 8.994 1.69 1.44 1.46 +Hg 80 10.43 2.00 1.32 1.44 +B_ 5 8.297 2.04 0.84 2.01 +Al 13 5.985 1.61 1.21 1.47 +Ga 31 5.998 1.81 1.22 1.82 +In 49 5.786 1.78 1.42 1.49 +Tl 81 6.108 1.62 1.45 1.44 +C_ 6 11.26 2.55 0.76 2.50 +Si 14 8.151 1.90 1.11 1.74 +Ge 32 7.897 2.01 1.20 2.02 +Sn 50 7.344 1.96 1.39 1.72 +Pb 82 7.416 2.33 1.46 1.55 +N_ 7 14.53 3.04 0.71 3.07 +P_ 15 10.48 2.19 1.07 2.06 +As 33 9.814 2.18 1.19 2.20 +Sb 51 8.643 2.05 1.39 1.82 +Bi 83 7.286 2.02 1.48 1.67 +O_ 8 13.61 3.44 0.66 3.50 +S_ 16 10.36 2.58 1.05 2.44 +Se 34 9.752 2.55 1.20 2.48 +Te 52 9.009 2.10 1.38 2.01 +Po 84 8.416 2.00 1.40 1.76 +F_ 9 17.42 3.98 0.57 4.10 +Cl 17 12.96 3.16 1.02 2.83 +Br 35 11.81 2.96 1.20 2.74 +I_ 53 10.45 2.66 1.39 2.21 +At 85 9.317 2.20 1.50 1.90 +La 57 5.577 1.10 2.07 1.08 +Ce 58 5.538 1.12 2.04 1.08 +Pr 59 5.461 1.13 2.03 1.07 +Nd 60 5.525 1.14 2.01 1.07 +Pm 61 5.597 1.15 1.99 1.07 +Sm 62 5.643 1.17 1.98 1.07 +Eu 63 5.670 1.18 1.98 1.01 +Gd 64 6.150 1.20 1.96 1.11 +Tb 65 5.864 1.21 1.94 1.10 +Dy 66 5.938 1.22 1.92 1.10 +Ho 67 6.021 1.23 1.92 1.10 +Er 68 6.107 1.24 1.89 1.11 +Tm 69 6.184 1.25 1.90 1.11 +Yb 70 6.253 1.25 1.87 1.06 +Ac 89 5.172 1.10 2.15 1.00 +Th 90 6.083 1.30 2.06 1.11 +Pa 91 5.887 1.50 2.00 1.14 +U_ 92 6.193 1.38 1.96 1.22 +Np 93 6.265 1.36 1.90 1.22 +Pu 94 6.059 1.28 1.87 1.22 +Am 95 5.991 1.30 1.80 1.20 +Cm 96 6.022 1.30 1.69 1.20 +Bk 97 6.198 1.30 1.69 1.20 +Cf 98 6.282 1.30 1.69 1.20 +Es 99 6.368 1.30 1.69 1.20 + + +data source: https://www.webelements.com/ +Z: atomic number +IE: first ionization energy +XP: Pauling electronegativity;Pb2+: 1.87,Pb4+:2.33 +XAR: Allred-Rochow electrogenativity +Rc: covalent radius (2008) + diff --git a/utilities/k-fold-cv.f90 b/utilities/k-fold-cv.f90 new file mode 100644 index 0000000..d9879df --- /dev/null +++ b/utilities/k-fold-cv.f90 @@ -0,0 +1,142 @@ +program kfoldcv +! creating the train.dat and SISSO.in for the subsets of samples for k-fold cross validation +! applicable to both classification and regression + +integer i,j,k,l,iii,kfold,ptype,ngroup +integer,allocatable:: nsample(:),msample(:) +character jobname*7,line*100000,nsample_line*100000 +real rand +logical,allocatable:: selected(:,:),selected_this(:,:) + +! User input + parameter(kfold=10) + + +! read SISSO.in for ptype, nsample +open(1,file='SISSO.in',status='old') +do while (.not. eof(1)) + read(1,'(a)') line + i=index(line,'!') + if(i/=0) line(i:)='' + if(index(line,'ptype')/=0) then + j=index(line,'=') + read(line(j+1:),*) ptype + else if(index(line,'nsample')/=0) then + j=index(line,'=') + read(line(j+1:),'(a)') nsample_line + end if +end do +close(1) + +if(ptype==1) then + allocate(nsample(1)) + read(nsample_line,*) nsample(1) + ngroup=1 +else if(ptype==2) then + i=index(nsample_line,'(') + j=index(nsample_line,')') + l=0 + do k=i,j + if(nsample_line(k:k)==',') l=l+1 + end do + allocate(nsample(l+1)) + read(nsample_line(i+1:j-1),*) nsample(1:l+1) + ngroup=l+1 +end if + +allocate(msample(ngroup)) +allocate(selected(ngroup,maxval(nsample))) +allocate(selected_this(ngroup,maxval(nsample))) + +call random_seed() + +!---------------------- + +selected=.false. + +do i=1,kfold + + selected_this=.false. + do iii=1,ngroup ! randomly selecting training data for this fold + k=0 + msample(iii)=nsample(iii)/kfold + if(i<=mod(nsample(iii),kfold)) msample(iii)=msample(iii)+1 + do while(k(i5,a),i5,a)') & + 'nsample = (',((nsample(k)-msample(k),','),k=1,ngroup-1),nsample(ngroup)-msample(ngroup),')' + end if + write(2,'(a)') trim(line) + end if + end do + + close(1) + close(2) + +end do + + +deallocate(nsample) +deallocate(msample) +deallocate(selected) +deallocate(selected_this) + + +end program + diff --git a/utilities/leave-percent-out-cv.f90 b/utilities/leave-percent-out-cv.f90 new file mode 100644 index 0000000..d71bd85 --- /dev/null +++ b/utilities/leave-percent-out-cv.f90 @@ -0,0 +1,134 @@ +program lpocv +! creating the train.dat and SISSO.in for the subsets of samples for leave-percent-out cross validation +! applicable to both classification and regression + +integer i,j,k,l,iii,niter,ptype,ngroup +integer,allocatable:: nsample(:),msample(:) +character jobname*7,line*100000,nsample_line*100000 +real rand,percent +logical,allocatable:: selected(:,:) + +! USER input +parameter(niter=50,percent=0.1) + +! read SISSO.in for ptype, nsample +open(1,file='SISSO.in',status='old') +do while (.not. eof(1)) + read(1,'(a)') line + i=index(line,'!') + if(i/=0) line(i:)='' + if(index(line,'ptype')/=0) then + j=index(line,'=') + read(line(j+1:),*) ptype + else if(index(line,'nsample')/=0) then + j=index(line,'=') + read(line(j+1:),'(a)') nsample_line + end if +end do +close(1) + +if(ptype==1) then + allocate(nsample(1)) + read(nsample_line,*) nsample(1) + ngroup=1 +else if(ptype==2) then + i=index(nsample_line,'(') + j=index(nsample_line,')') + l=0 + do k=i,j + if(nsample_line(k:k)==',') l=l+1 + end do + allocate(nsample(l+1)) + read(nsample_line(i+1:j-1),*) nsample(1:l+1) + ngroup=l+1 +end if + +allocate(msample(ngroup)) +allocate(selected(ngroup,maxval(nsample))) + + +call random_seed() + +!---------------------- + +do i=1,niter + + selected=.false. + + do iii=1,ngroup ! randomly selecting training data for this fold + k=0 + msample(iii)=nint(nsample(iii)*percent) + do while(k(i5,a),i5,a)') & + 'nsample = (',((nsample(k)-msample(k),','),k=1,ngroup-1),nsample(ngroup)-msample(ngroup),')' + end if + write(2,'(a)') trim(line) + end if + end do + + close(1) + close(2) + + +end do + +deallocate(nsample) +deallocate(selected) + +end program + diff --git a/utilities/samplelist b/utilities/samplelist new file mode 100644 index 0000000..641a31c --- /dev/null +++ b/utilities/samplelist @@ -0,0 +1,26 @@ +compound ! (title line, any string) + LaBi ! (coumpounds whose element must appear in the file atom_features) + HfC_ ! (same string length for all the compounds) + ZrC_ + LaSb + Y_Sb + ThC_ + Y_Se + CeAs + ErAs + GdAs + TbAs + YbAs + CeBi + DyBi + ErBi + GdBi + PrBi + CrC_ + FeC_ + ScC_ + TaC_ + V_C_ + CeN_ + CeP_ + HoH_