diff --git a/aeon/clustering/_clara.py b/aeon/clustering/_clara.py index dcc0e7a912..66da2e9920 100644 --- a/aeon/clustering/_clara.py +++ b/aeon/clustering/_clara.py @@ -30,8 +30,8 @@ class TimeSeriesCLARA(BaseClusterer): n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. - init_algorithm : str or np.ndarray, default='random' - Method for initializing cluster centers. Any of the following are valid: + init : str or np.ndarray, default='random' + Method for initialising cluster centers. Any of the following are valid: ['kmedoids++', 'random', 'first']. Random is the default as it is very fast and it was found in [2] to perform about as well as the other methods. @@ -118,7 +118,7 @@ class TimeSeriesCLARA(BaseClusterer): def __init__( self, n_clusters: int = 8, - init_algorithm: Union[str, np.ndarray] = "random", + init: Union[str, np.ndarray] = "random", distance: Union[str, Callable] = "msm", n_samples: Optional[int] = None, n_sampling_iters: int = 10, @@ -129,8 +129,8 @@ def __init__( random_state: Optional[Union[int, RandomState]] = None, distance_params: Optional[dict] = None, ): - self.init_algorithm = init_algorithm self.distance = distance + self.init = init self.n_init = n_init self.max_iter = max_iter self.tol = tol @@ -175,7 +175,7 @@ def _fit(self, X: np.ndarray, y=None): ) pam = TimeSeriesKMedoids( n_clusters=self.n_clusters, - init_algorithm=self.init_algorithm, + init=self.init, distance=self.distance, n_init=self.n_init, max_iter=self.max_iter, @@ -228,7 +228,7 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init_algorithm": "random", + "init": "random", "distance": "euclidean", "n_init": 1, "max_iter": 1, diff --git a/aeon/clustering/_clarans.py b/aeon/clustering/_clarans.py index f1c9eff87b..71ca1a9284 100644 --- a/aeon/clustering/_clarans.py +++ b/aeon/clustering/_clarans.py @@ -31,8 +31,8 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids): n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. - init_algorithm : str or np.ndarray, default='random' - Method for initializing cluster centers. Any of the following are valid: + init : str or np.ndarray, default='random' + Method for initialising cluster centers. Any of the following are valid: ['kmedoids++', 'random', 'first']. Random is the default as it is very fast and it was found in [2] to perform about as well as the other methods. @@ -104,7 +104,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids): def __init__( self, n_clusters: int = 8, - init_algorithm: Union[str, np.ndarray] = "random", + init: Union[str, np.ndarray] = "random", distance: Union[str, Callable] = "msm", max_neighbours: Optional[int] = None, n_init: int = 10, @@ -116,7 +116,7 @@ def __init__( super().__init__( n_clusters=n_clusters, - init_algorithm=init_algorithm, + init=init, distance=distance, n_init=n_init, verbose=verbose, @@ -127,10 +127,10 @@ def __init__( def _fit_one_init(self, X: np.ndarray, max_neighbours: int): j = 0 X_indexes = np.arange(X.shape[0], dtype=int) - if isinstance(self._init_algorithm, Callable): - best_medoids = self._init_algorithm(X) + if isinstance(self._init, Callable): + best_medoids = self._init(X) else: - best_medoids = self._init_algorithm + best_medoids = self._init best_non_medoids = np.setdiff1d(X_indexes, best_medoids) best_cost = ( self._compute_pairwise(X, best_non_medoids, best_medoids).min(axis=1).sum() @@ -203,7 +203,7 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init_algorithm": "random", + "init": "random", "distance": "euclidean", "max_neighbours": None, "n_init": 1, diff --git a/aeon/clustering/_k_medoids.py b/aeon/clustering/_k_medoids.py index ea8e860afc..a54220cec2 100644 --- a/aeon/clustering/_k_medoids.py +++ b/aeon/clustering/_k_medoids.py @@ -44,8 +44,8 @@ class TimeSeriesKMedoids(BaseClusterer): ---------- n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. - init_algorithm : str or np.ndarray, default='random' - Method for initializing cluster centers. Any of the following are valid: + init : str or np.ndarray, default='random' + Method for initialising cluster centers. Any of the following are valid: ['kmedoids++', 'random', 'first']. Random is the default as it is very fast and it was found in [2] to perform about as well as the other methods. @@ -152,7 +152,7 @@ class TimeSeriesKMedoids(BaseClusterer): def __init__( self, n_clusters: int = 8, - init_algorithm: Union[str, np.ndarray] = "random", + init: Union[str, np.ndarray] = "random", distance: Union[str, Callable] = "msm", method: str = "pam", n_init: int = 10, @@ -162,8 +162,8 @@ def __init__( random_state: Optional[Union[int, RandomState]] = None, distance_params: Optional[dict] = None, ): - self.init_algorithm = init_algorithm self.distance = distance + self.init = init self.n_init = n_init self.max_iter = max_iter self.tol = tol @@ -179,7 +179,7 @@ def __init__( self.n_iter_ = 0 self._random_state = None - self._init_algorithm = None + self._init = None self._distance_cache = None self._distance_callable = None self._fit_method = None @@ -267,10 +267,10 @@ def _pam_fit(self, X: np.ndarray): old_inertia = np.inf n_cases = X.shape[0] - if isinstance(self._init_algorithm, Callable): - medoids_idxs = self._init_algorithm(X) + if isinstance(self._init, Callable): + medoids_idxs = self._init(X) else: - medoids_idxs = self._init_algorithm + medoids_idxs = self._init not_medoid_idxs = np.arange(n_cases, dtype=int) distance_matrix = self._compute_pairwise(X, not_medoid_idxs, not_medoid_idxs) distance_closest_medoid, distance_second_closest_medoid = np.sort( @@ -388,9 +388,9 @@ def _compute_optimal_swaps( return None def _alternate_fit(self, X) -> tuple[np.ndarray, np.ndarray, float, int]: - cluster_center_indexes = self._init_algorithm - if isinstance(self._init_algorithm, Callable): - cluster_center_indexes = self._init_algorithm(X) + cluster_center_indexes = self._init + if isinstance(self._init, Callable): + cluster_center_indexes = self._init(X) old_inertia = np.inf old_indexes = None for i in range(self.max_iter): @@ -428,24 +428,21 @@ def _assign_clusters( def _check_params(self, X: np.ndarray) -> None: self._random_state = check_random_state(self.random_state) - if isinstance(self.init_algorithm, str): - if self.init_algorithm == "random": - self._init_algorithm = self._random_center_initializer - elif self.init_algorithm == "kmedoids++": - self._init_algorithm = self._kmedoids_plus_plus_center_initializer - elif self.init_algorithm == "first": - self._init_algorithm = self._first_center_initializer - elif self.init_algorithm == "build": - self._init_algorithm = self._pam_build_center_initializer + if isinstance(self.init, str): + if self.init == "random": + self._init = self._random_center_initializer + elif self.init == "kmedoids++": + self._init = self._kmedoids_plus_plus_center_initializer + elif self.init == "first": + self._init = self._first_center_initializer + elif self.init == "build": + self._init = self._pam_build_center_initializer else: - if ( - isinstance(self.init_algorithm, np.ndarray) - and len(self.init_algorithm) == self.n_clusters - ): - self._init_algorithm = self.init_algorithm + if isinstance(self.init, np.ndarray) and len(self.init) == self.n_clusters: + self._init = self.init else: raise ValueError( - f"The value provided for init_algorithm: {self.init_algorithm} is " + f"The value provided for init: {self.init} is " f"invalid. The following are a list of valid init algorithms " f"strings: random, kmedoids++, first. You can also pass a" f"np.ndarray of size (n_clusters, n_channels, n_timepoints)" @@ -469,7 +466,7 @@ def _check_params(self, X: np.ndarray) -> None: else: raise ValueError(f"method {self.method} is not supported") - if isinstance(self.init_algorithm, str) and self.init_algorithm == "build": + if isinstance(self.init, str) and self.init == "build": if self.n_init != 10 and self.n_init > 1: warnings.warn( "When using build n_init does not need to be greater than 1. " @@ -558,7 +555,7 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init_algorithm": "random", + "init": "random", "distance": "euclidean", "n_init": 1, "max_iter": 1, diff --git a/aeon/clustering/_k_shape.py b/aeon/clustering/_k_shape.py index ad94a9f10c..aa8d8a3b64 100644 --- a/aeon/clustering/_k_shape.py +++ b/aeon/clustering/_k_shape.py @@ -16,8 +16,8 @@ class TimeSeriesKShape(BaseClusterer): n_clusters: int, default=8 The number of clusters to form as well as the number of centroids to generate. - init_algorithm: str or np.ndarray, default='random' - Method for initializing cluster centres. Any of the following are valid: + init: str or np.ndarray, default='random' + Method for initialising cluster centres. Any of the following are valid: ['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints) and gives the initial cluster centres. n_init: int, default=10 @@ -76,15 +76,15 @@ class TimeSeriesKShape(BaseClusterer): def __init__( self, n_clusters: int = 8, - init_algorithm: Union[str, np.ndarray] = "random", + init: Union[str, np.ndarray] = "random", n_init: int = 10, max_iter: int = 300, tol: float = 1e-4, verbose: bool = False, random_state: Optional[Union[int, RandomState]] = None, ): - self.init_algorithm = init_algorithm self.n_init = n_init + self.init = init self.max_iter = max_iter self.tol = tol self.verbose = verbose @@ -124,7 +124,7 @@ def _fit(self, X, y=None): random_state=self.random_state, n_init=self.n_init, verbose=self.verbose, - init=self.init_algorithm, + init=self.init, ) _X = X.swapaxes(1, 2) @@ -173,7 +173,7 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init_algorithm": "random", + "init": "random", "n_init": 1, "max_iter": 1, "tol": 1e-4, diff --git a/aeon/clustering/tests/test_clara.py b/aeon/clustering/tests/test_clara.py index ebfb3dada5..81d5e8920e 100644 --- a/aeon/clustering/tests/test_clara.py +++ b/aeon/clustering/tests/test_clara.py @@ -23,7 +23,7 @@ def test_clara_uni(): n_samples=10, n_init=2, max_iter=5, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=2, ) @@ -68,7 +68,7 @@ def test_clara_multi(): n_samples=10, n_init=2, max_iter=5, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=2, ) diff --git a/aeon/clustering/tests/test_clarans.py b/aeon/clustering/tests/test_clarans.py index 03e250bdf3..a1da285cf3 100644 --- a/aeon/clustering/tests/test_clarans.py +++ b/aeon/clustering/tests/test_clarans.py @@ -24,7 +24,7 @@ def test_clarans_uni(): clarans = TimeSeriesCLARANS( random_state=1, n_init=2, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=2, ) @@ -67,7 +67,7 @@ def test_clara_multi(): clarans = TimeSeriesCLARANS( random_state=1, n_init=2, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=2, ) @@ -106,7 +106,7 @@ def test_medoids_init(): kmedoids = TimeSeriesCLARANS( random_state=1, n_init=1, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=num_clusters, ) @@ -131,7 +131,7 @@ def test_medoids_init(): kmedoids = TimeSeriesCLARANS( random_state=1, n_init=1, - init_algorithm=custom_init_centres, + init=custom_init_centres, distance="euclidean", n_clusters=num_clusters, ) diff --git a/aeon/clustering/tests/test_k_medoids.py b/aeon/clustering/tests/test_k_medoids.py index 57bd7c1039..0fea3ead19 100644 --- a/aeon/clustering/tests/test_k_medoids.py +++ b/aeon/clustering/tests/test_k_medoids.py @@ -43,7 +43,7 @@ def _pam_uni_medoids(X_train, y_train, X_test, y_test): random_state=1, n_init=2, max_iter=5, - init_algorithm="first", + init="first", distance="euclidean", method="pam", ) @@ -70,7 +70,7 @@ def _alternate_uni_medoids(X_train, y_train, X_test, y_test): n_init=2, max_iter=5, method="alternate", - init_algorithm="first", + init="first", distance="euclidean", ) train_medoids_result = kmedoids.fit_predict(X_train) @@ -95,7 +95,7 @@ def _pam_multi_medoids(X_train, y_train, X_test, y_test): random_state=1, n_init=2, max_iter=5, - init_algorithm="first", + init="first", distance="euclidean", method="pam", ) @@ -121,7 +121,7 @@ def _alternate_multi_medoids(X_train, y_train, X_test, y_test): random_state=1, n_init=2, max_iter=5, - init_algorithm="first", + init="first", method="alternate", distance="euclidean", ) @@ -169,7 +169,7 @@ def test_medoids_init(): random_state=1, n_init=1, max_iter=5, - init_algorithm="first", + init="first", distance="euclidean", n_clusters=num_clusters, ) @@ -194,7 +194,7 @@ def test_medoids_init(): random_state=1, n_init=1, max_iter=5, - init_algorithm=custom_init_centres, + init=custom_init_centres, distance="euclidean", n_clusters=num_clusters, ) @@ -209,7 +209,7 @@ def _get_model_centres(data, distance, method="pam", distance_params=None): method=method, n_init=2, n_clusters=2, - init_algorithm="random", + init="random", distance=distance, distance_params=distance_params, ) diff --git a/examples/clustering/partitional_clustering.ipynb b/examples/clustering/partitional_clustering.ipynb index 817f66bd24..5fd9ac1eec 100644 --- a/examples/clustering/partitional_clustering.ipynb +++ b/examples/clustering/partitional_clustering.ipynb @@ -1374,7 +1374,7 @@ "source": [ "k_medoids = TimeSeriesKMedoids(\n", " n_clusters=2, # Number of desired centers\n", - " init_algorithm=\"random\", # Center initialisation technique\n", + " init=\"random\", # Center initialisation technique\n", " max_iter=10, # Maximum number of iterations for refinement on training set\n", " verbose=False, # Verbose\n", " distance=\"dtw\", # Distance to use\n", @@ -1458,7 +1458,7 @@ "source": [ "k_medoids = TimeSeriesKMedoids(\n", " n_clusters=2, # Number of desired centers\n", - " init_algorithm=\"random\", # Center initialisation technique\n", + " init=\"random\", # Center initialisation technique\n", " max_iter=10, # Maximum number of iterations for refinement on training set\n", " distance=\"msm\", # Distance to use\n", " random_state=1,\n", @@ -1527,7 +1527,7 @@ "source": [ "k_medoids = TimeSeriesKMedoids(\n", " n_clusters=2, # Number of desired centers\n", - " init_algorithm=\"random\", # Center initialisation technique\n", + " init=\"random\", # Center initialisation technique\n", " max_iter=10, # Maximum number of iterations for refinement on training set\n", " distance=\"msm\", # Distance to use\n", " random_state=1,\n",