Skip to content

Commit

Permalink
PRv0.6.0 (#5)
Browse files Browse the repository at this point in the history
* Implemented Mini-Batch K-Means and Birch clustering algorithms (scikit-learn).

* Implemented Agglomerative Clustering (scikit-learn).

* Fixed cluster labels mapping dictionary checking procedure.

* Implemented Agglomerative Clustering (scipy).

* Implemented Birch clustering algorithm (pyclustering).

* Implemented Cure clustering algorithm (pyclustering).

* Implemented K-Means clustering algorithm (pyclustering).

* Small documentation fixes and reorder clustering algorithms labels.

* Implemented X-Means clustering algorithm (pyclustering).

* Implemented Agglomerative clustering algorithm (fastcluster) and reorder clustering algorithms labels.

* Implemented two data standardization algorithms to standardize the clustering global data matrix.

* Included standardization algorithm as an optional parameter in CRATE's input data file.

* Changed default self-consistent scheme and fixed CRATE's input data file documentation.

* Updated CRATE's input data file to include implemented clustering algorithms.
  • Loading branch information
BernardoFerreira authored Oct 12, 2020
1 parent 0638094 commit 23a5167
Show file tree
Hide file tree
Showing 5 changed files with 933 additions and 81 deletions.
89 changes: 89 additions & 0 deletions clustering/clusteringdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import copy
# Defining abstract base classes
from abc import ABC, abstractmethod
# Data preprocessing tools
import sklearn.preprocessing as skpp
# Display messages
import ioput.info as info
# RVE response database
Expand All @@ -35,6 +37,7 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
n_voxels_dims = rg_dict['n_voxels_dims']
# Get clustering data
clustering_solution_method = clst_dict['clustering_solution_method']
standardization_method = clst_dict['standardization_method']
clustering_scheme = clst_dict['clustering_scheme']
# Compute total number of voxels
n_voxels = np.prod(n_voxels_dims)
Expand Down Expand Up @@ -65,6 +68,19 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
info.displayinfo('5', 'Computing cluster analysis global data matrix...')
# Compute clustering global data matrix containing all clustering features
clustering_data.set_global_data_matrix(rve_elastic_database.rve_global_response)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
info.displayinfo('5', 'Standardizing cluster analysis global data matrix...')
# Instantiate standardization algorithm
if standardization_method == 1:
standardizer = MinMaxScaler()
elif standardization_method == 2:
standardizer = StandardScaler()
else:
raise RuntimeError('Unknown standardization method.')
# Standardize clustering global data matrix
clustering_data.global_data_matrix = \
standardizer.get_standardized_data_matrix(clustering_data.global_data_matrix)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Store clustering global data matrix
clst_dict['clst_quantities'] = clustering_data.global_data_matrix
#
Expand Down Expand Up @@ -329,3 +345,76 @@ class StrainConcentrationTensor(FeatureAlgorithm):
def get_feature_data_matrix(self, rve_response):
data_matrix = copy.deepcopy(rve_response)
return data_matrix
#
# Data standardization algorithms
# ==========================================================================================
class Standardizer(ABC):
'''Data standardization algorithm interface.'''
@abstractmethod
def __init__(self):
'''Standardization algorithm constructor.'''
pass
# --------------------------------------------------------------------------------------
@abstractmethod
def get_standardized_data_matrix(self, data_matrix):
'''Standardize provided data matrix.
Parameters
----------
data_matrix: ndarray of shape (n_items, n_features)
Data matrix to be standardized.
Returns
-------
data_matrix: ndarray of shape (n_items, n_features)
Transformed data matrix.
'''
pass
# ------------------------------------------------------------------------------------------
class MinMaxScaler(Standardizer):
'''Transform features by scaling each feature to a given min-max range.
Attributes
----------
_feature_range : tuple(min, max), default=(0, 1)
Desired range of transformed data.
Notes
-----
The Min-Max scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
Further information can be found in there.
'''
def __init__(self, feature_range=(0, 1)):
'''Standardization algorithm constructor.'''
self._feature_range = feature_range
# --------------------------------------------------------------------------------------
def get_standardized_data_matrix(self, data_matrix):
'''Standardize provided data matrix.'''
# Instatiante standardizer
standardizer = skpp.MinMaxScaler(feature_range=self._feature_range, copy=False)
# Fit scaling parameters and transform data
data_matrix = standardizer.fit_transform(data_matrix)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return data_matrix
# ------------------------------------------------------------------------------------------
class StandardScaler(Standardizer):
'''Transform features by removing the mean and scaling to unit variance (standard
normal distribution).
Notes
-----
The Standard scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
Further information can be found in there.
'''
def __init__(self, feature_range=(0, 1)):
'''Standardization algorithm constructor.'''
self._feature_range = feature_range
# --------------------------------------------------------------------------------------
def get_standardized_data_matrix(self, data_matrix):
'''Standardize provided data matrix.'''
# Instatiante standardizer
standardizer = skpp.StandardScaler(with_mean=True, with_std=True, copy=False)
# Fit scaling parameters and transform data
data_matrix = standardizer.fit_transform(data_matrix)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return data_matrix
Loading

0 comments on commit 23a5167

Please sign in to comment.