|
13 | 13 | from pyevtk.hl import gridToVTK#, pointsToVTKAsTIN
|
14 | 14 | import yaml
|
15 | 15 | import pyvista as pv
|
| 16 | +from sklearn.mixture import GaussianMixture |
| 17 | +from sklearn.ensemble import RandomForestClassifier |
16 | 18 |
|
17 | 19 | class CompositionClustering():
|
18 | 20 |
|
@@ -273,4 +275,95 @@ def plot3d(self, **kwargs):
|
273 | 275 | grid.plot(**kwargs, jupyter_backend="panel")
|
274 | 276 |
|
275 | 277 |
|
| 278 | + def plot_relative_importance(self, feature_importances, feature_names, sorted_idx): |
| 279 | + # Plotting the feature importances |
| 280 | + # Create the vertical bar graph |
| 281 | + plt.figure(figsize=(10, 12)) |
| 282 | + plt.title("Feature Importances") |
| 283 | + plt.bar(range(len(sorted_idx)), feature_importances[sorted_idx], align="center") |
| 284 | + plt.xticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx], rotation=45) |
| 285 | + plt.ylabel("Relative Importance") |
| 286 | + plt.xlabel("Features") |
| 287 | + plt.show() |
| 288 | + |
| 289 | + def auto_phase_assign(self, Slices_file, Vox_ratios_file, |
| 290 | + initial_guess_phases, plot=False, |
| 291 | + print_importance=False, |
| 292 | + modified_comp_analysis=None, |
| 293 | + n_trunc_spec=None): |
| 294 | + |
| 295 | + |
| 296 | + with h5py.File(Slices_file , "r") as hdfr: |
| 297 | + group1 = hdfr.get("group_xyz_Da_spec") |
| 298 | + Chem_list =list(list(group1.attrs.values())[1]) |
| 299 | + #hdfr['Group_xyz_Da_spec'].attrs.keys() |
| 300 | + |
| 301 | + |
| 302 | + with h5py.File(Vox_ratios_file , "r") as hdfr: |
| 303 | + Ratios = np.array(hdfr.get("vox_ratios")) |
| 304 | + group = hdfr.get("vox_ratios") |
| 305 | + Ratios_colomns = list(list(hdfr.attrs.values())[0]) |
| 306 | + |
| 307 | + |
| 308 | + Ratios = pd.DataFrame(data=Ratios, columns=Ratios_colomns) |
| 309 | + |
| 310 | + X = Ratios.drop(['Total_no','vox'], axis=1) |
| 311 | + |
| 312 | + gm = GaussianMixture(n_components=initial_guess_phases, max_iter=100000,verbose=0) |
| 313 | + gm.fit(X) |
| 314 | + y_pred=gm.predict(X) |
| 315 | + Ratios = pd.DataFrame(data=X.values, columns=Chem_list) |
| 316 | + |
| 317 | + |
| 318 | + # Replace this with your actual dataset loading code |
| 319 | + X_ = X.values |
| 320 | + y = y_pred |
| 321 | + # Initialize the Random Forest Classifier |
| 322 | + rf = RandomForestClassifier(n_estimators=100, random_state=42) |
| 323 | + |
| 324 | + # Fit the model to the data |
| 325 | + rf.fit(X_, y) |
276 | 326 |
|
| 327 | + # Get the feature importances |
| 328 | + feature_importances = rf.feature_importances_ |
| 329 | + |
| 330 | + # Sort the features by their importances |
| 331 | + #sorted_idx = np.argsort(feature_importances) |
| 332 | + sorted_idx = feature_importances.argsort()[::-1] |
| 333 | + |
| 334 | + |
| 335 | + |
| 336 | + # Print sorted feature importances along with their corresponding feature numbers |
| 337 | + feature_names = Chem_list |
| 338 | + |
| 339 | + |
| 340 | + if plot==True: |
| 341 | + self.plot_relative_importance(feature_importances, feature_names, sorted_idx) |
| 342 | + |
| 343 | + if print_importance == True: |
| 344 | + for index in sorted_idx: |
| 345 | + print(f" {feature_names[index]} - Importance: {feature_importances[index]}") |
| 346 | + |
| 347 | + # BIC analysis on modified compositions |
| 348 | + if modified_comp_analysis == True: |
| 349 | + |
| 350 | + #n_trunc_spec = 2 |
| 351 | + X_modified = X.values[:, sorted_idx][:,0:n_trunc_spec] |
| 352 | + gm_scores=[] |
| 353 | + aics=[] |
| 354 | + bics=[] |
| 355 | + |
| 356 | + n_clusters=list(range(1,11)) |
| 357 | + for n_cluster in tqdm(n_clusters): |
| 358 | + gm = GaussianMixture(n_components=n_cluster,verbose=0) |
| 359 | + gm.fit(X_modified) |
| 360 | + y_pred=gm.predict(X_modified) |
| 361 | + #gm_scores.append(homogeneity_score(y,y_pred)) |
| 362 | + aics.append(gm.aic(X_modified)) |
| 363 | + bics.append(gm.bic(X_modified)) |
| 364 | + |
| 365 | + plt.plot(n_clusters, bics, "-o",label="BIC") |
| 366 | + |
| 367 | + return sorted_idx |
| 368 | + |
| 369 | + |
0 commit comments