Skip to content

Commit

Permalink
Merged upstream/main from Alaukiksaxena/CompositionSpace (i.e. the or…
Browse files Browse the repository at this point in the history
…iginal repo from which eisenforschung forked off its CompositionSpace) into eisenforschung's CompositionSpace/main
  • Loading branch information
atomprobe-tc committed Jul 3, 2024
2 parents d7abfe9 + 44a81ff commit b3ed0d7
Show file tree
Hide file tree
Showing 5 changed files with 422 additions and 37 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,8 @@ traj.*
*.sub*
*.dat
*log*
*.h5
*.vtu
*.png
./output/
./tests/data/
3 changes: 2 additions & 1 deletion compositionspace/datautils.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def get_apt_dataframe(self):

if filename.endswith(".RRNG"):
path = os.path.join(self.params["input_path"], filename)
ions,rrngs = self.get_rrng(rrange_file)
ions,rrngs = self.get_rrng(path)

return (df_Mass_POS_lst, file_name_lst, ions, rrngs)

Expand Down Expand Up @@ -278,6 +278,7 @@ def get_big_slices(self):
group1 = hdf.create_group("group_xyz_Da_spec")
group1.attrs["columns"] = ["x","y","z","Da","spec"]
group1.attrs["spec_name_order"] = list(c)
self.chemical_species = list(c) # Added A.S. 2024.06.25
sublength_x= abs((max(sorted_df['z'])-min(sorted_df['z']))/self.params["n_big_slices"])

start = min(sorted_df['z'])
Expand Down
93 changes: 93 additions & 0 deletions compositionspace/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from pyevtk.hl import gridToVTK#, pointsToVTKAsTIN
import yaml
import pyvista as pv
from sklearn.mixture import GaussianMixture
from sklearn.ensemble import RandomForestClassifier

class CompositionClustering():

Expand Down Expand Up @@ -273,4 +275,95 @@ def plot3d(self, **kwargs):
grid.plot(**kwargs, jupyter_backend="panel")


def plot_relative_importance(self, feature_importances, feature_names, sorted_idx):
# Plotting the feature importances
# Create the vertical bar graph
plt.figure(figsize=(10, 12))
plt.title("Feature Importances")
plt.bar(range(len(sorted_idx)), feature_importances[sorted_idx], align="center")
plt.xticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx], rotation=45)
plt.ylabel("Relative Importance")
plt.xlabel("Features")
plt.show()

def auto_phase_assign(self, Slices_file, Vox_ratios_file,
initial_guess_phases, plot=False,
print_importance=False,
modified_comp_analysis=None,
n_trunc_spec=None):


with h5py.File(Slices_file , "r") as hdfr:
group1 = hdfr.get("group_xyz_Da_spec")
Chem_list =list(list(group1.attrs.values())[1])
#hdfr['Group_xyz_Da_spec'].attrs.keys()


with h5py.File(Vox_ratios_file , "r") as hdfr:
Ratios = np.array(hdfr.get("vox_ratios"))
group = hdfr.get("vox_ratios")
Ratios_colomns = list(list(hdfr.attrs.values())[0])


Ratios = pd.DataFrame(data=Ratios, columns=Ratios_colomns)

X = Ratios.drop(['Total_no','vox'], axis=1)

gm = GaussianMixture(n_components=initial_guess_phases, max_iter=100000,verbose=0)
gm.fit(X)
y_pred=gm.predict(X)
Ratios = pd.DataFrame(data=X.values, columns=Chem_list)


# Replace this with your actual dataset loading code
X_ = X.values
y = y_pred
# Initialize the Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to the data
rf.fit(X_, y)

# Get the feature importances
feature_importances = rf.feature_importances_

# Sort the features by their importances
#sorted_idx = np.argsort(feature_importances)
sorted_idx = feature_importances.argsort()[::-1]



# Print sorted feature importances along with their corresponding feature numbers
feature_names = Chem_list


if plot==True:
self.plot_relative_importance(feature_importances, feature_names, sorted_idx)

if print_importance == True:
for index in sorted_idx:
print(f" {feature_names[index]} - Importance: {feature_importances[index]}")

# BIC analysis on modified compositions
if modified_comp_analysis == True:

#n_trunc_spec = 2
X_modified = X.values[:, sorted_idx][:,0:n_trunc_spec]
gm_scores=[]
aics=[]
bics=[]

n_clusters=list(range(1,11))
for n_cluster in tqdm(n_clusters):
gm = GaussianMixture(n_components=n_cluster,verbose=0)
gm.fit(X_modified)
y_pred=gm.predict(X_modified)
#gm_scores.append(homogeneity_score(y,y_pred))
aics.append(gm.aic(X_modified))
bics.append(gm.bic(X_modified))

plt.plot(n_clusters, bics, "-o",label="BIC")

return sorted_idx


Loading

0 comments on commit b3ed0d7

Please sign in to comment.