Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNT] cluster init_algorithm removed across all clusterers #2392

Merged
merged 4 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions aeon/clustering/_clara.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class TimeSeriesCLARA(BaseClusterer):
n_clusters : int, default=8
The number of clusters to form as well as the number of
centroids to generate.
init_algorithm : str or np.ndarray, default='random'
Method for initializing cluster centers. Any of the following are valid:
init : str or np.ndarray, default='random'
Method for initialising cluster centers. Any of the following are valid:
['kmedoids++', 'random', 'first'].
Random is the default as it is very fast and it was found in [2] to
perform about as well as the other methods.
Expand Down Expand Up @@ -118,7 +118,7 @@ class TimeSeriesCLARA(BaseClusterer):
def __init__(
self,
n_clusters: int = 8,
init_algorithm: Union[str, np.ndarray] = "random",
init: Union[str, np.ndarray] = "random",
distance: Union[str, Callable] = "msm",
n_samples: Optional[int] = None,
n_sampling_iters: int = 10,
Expand All @@ -129,8 +129,8 @@ def __init__(
random_state: Optional[Union[int, RandomState]] = None,
distance_params: Optional[dict] = None,
):
self.init_algorithm = init_algorithm
self.distance = distance
self.init = init
self.n_init = n_init
self.max_iter = max_iter
self.tol = tol
Expand Down Expand Up @@ -175,7 +175,7 @@ def _fit(self, X: np.ndarray, y=None):
)
pam = TimeSeriesKMedoids(
n_clusters=self.n_clusters,
init_algorithm=self.init_algorithm,
init=self.init,
distance=self.distance,
n_init=self.n_init,
max_iter=self.max_iter,
Expand Down Expand Up @@ -228,7 +228,7 @@ def _get_test_params(cls, parameter_set="default"):
"""
return {
"n_clusters": 2,
"init_algorithm": "random",
"init": "random",
"distance": "euclidean",
"n_init": 1,
"max_iter": 1,
Expand Down
16 changes: 8 additions & 8 deletions aeon/clustering/_clarans.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
n_clusters : int, default=8
The number of clusters to form as well as the number of
centroids to generate.
init_algorithm : str or np.ndarray, default='random'
Method for initializing cluster centers. Any of the following are valid:
init : str or np.ndarray, default='random'
Method for initialising cluster centers. Any of the following are valid:
['kmedoids++', 'random', 'first'].
Random is the default as it is very fast and it was found in [2] to
perform about as well as the other methods.
Expand Down Expand Up @@ -104,7 +104,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
def __init__(
self,
n_clusters: int = 8,
init_algorithm: Union[str, np.ndarray] = "random",
init: Union[str, np.ndarray] = "random",
distance: Union[str, Callable] = "msm",
max_neighbours: Optional[int] = None,
n_init: int = 10,
Expand All @@ -116,7 +116,7 @@ def __init__(

super().__init__(
n_clusters=n_clusters,
init_algorithm=init_algorithm,
init=init,
distance=distance,
n_init=n_init,
verbose=verbose,
Expand All @@ -127,10 +127,10 @@ def __init__(
def _fit_one_init(self, X: np.ndarray, max_neighbours: int):
j = 0
X_indexes = np.arange(X.shape[0], dtype=int)
if isinstance(self._init_algorithm, Callable):
best_medoids = self._init_algorithm(X)
if isinstance(self._init, Callable):
best_medoids = self._init(X)
else:
best_medoids = self._init_algorithm
best_medoids = self._init
best_non_medoids = np.setdiff1d(X_indexes, best_medoids)
best_cost = (
self._compute_pairwise(X, best_non_medoids, best_medoids).min(axis=1).sum()
Expand Down Expand Up @@ -203,7 +203,7 @@ def _get_test_params(cls, parameter_set="default"):
"""
return {
"n_clusters": 2,
"init_algorithm": "random",
"init": "random",
"distance": "euclidean",
"max_neighbours": None,
"n_init": 1,
Expand Down
53 changes: 25 additions & 28 deletions aeon/clustering/_k_medoids.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class TimeSeriesKMedoids(BaseClusterer):
----------
n_clusters : int, default=8
The number of clusters to form as well as the number of centroids to generate.
init_algorithm : str or np.ndarray, default='random'
Method for initializing cluster centers. Any of the following are valid:
init : str or np.ndarray, default='random'
Method for initialising cluster centers. Any of the following are valid:
['kmedoids++', 'random', 'first'].
Random is the default as it is very fast and it was found in [2] to
perform about as well as the other methods.
Expand Down Expand Up @@ -152,7 +152,7 @@ class TimeSeriesKMedoids(BaseClusterer):
def __init__(
self,
n_clusters: int = 8,
init_algorithm: Union[str, np.ndarray] = "random",
init: Union[str, np.ndarray] = "random",
distance: Union[str, Callable] = "msm",
method: str = "pam",
n_init: int = 10,
Expand All @@ -162,8 +162,8 @@ def __init__(
random_state: Optional[Union[int, RandomState]] = None,
distance_params: Optional[dict] = None,
):
self.init_algorithm = init_algorithm
self.distance = distance
self.init = init
self.n_init = n_init
self.max_iter = max_iter
self.tol = tol
Expand All @@ -179,7 +179,7 @@ def __init__(
self.n_iter_ = 0

self._random_state = None
self._init_algorithm = None
self._init = None
self._distance_cache = None
self._distance_callable = None
self._fit_method = None
Expand Down Expand Up @@ -267,10 +267,10 @@ def _pam_fit(self, X: np.ndarray):
old_inertia = np.inf
n_cases = X.shape[0]

if isinstance(self._init_algorithm, Callable):
medoids_idxs = self._init_algorithm(X)
if isinstance(self._init, Callable):
medoids_idxs = self._init(X)
else:
medoids_idxs = self._init_algorithm
medoids_idxs = self._init
not_medoid_idxs = np.arange(n_cases, dtype=int)
distance_matrix = self._compute_pairwise(X, not_medoid_idxs, not_medoid_idxs)
distance_closest_medoid, distance_second_closest_medoid = np.sort(
Expand Down Expand Up @@ -388,9 +388,9 @@ def _compute_optimal_swaps(
return None

def _alternate_fit(self, X) -> tuple[np.ndarray, np.ndarray, float, int]:
cluster_center_indexes = self._init_algorithm
if isinstance(self._init_algorithm, Callable):
cluster_center_indexes = self._init_algorithm(X)
cluster_center_indexes = self._init
if isinstance(self._init, Callable):
cluster_center_indexes = self._init(X)
old_inertia = np.inf
old_indexes = None
for i in range(self.max_iter):
Expand Down Expand Up @@ -428,24 +428,21 @@ def _assign_clusters(
def _check_params(self, X: np.ndarray) -> None:
self._random_state = check_random_state(self.random_state)

if isinstance(self.init_algorithm, str):
if self.init_algorithm == "random":
self._init_algorithm = self._random_center_initializer
elif self.init_algorithm == "kmedoids++":
self._init_algorithm = self._kmedoids_plus_plus_center_initializer
elif self.init_algorithm == "first":
self._init_algorithm = self._first_center_initializer
elif self.init_algorithm == "build":
self._init_algorithm = self._pam_build_center_initializer
if isinstance(self.init, str):
if self.init == "random":
self._init = self._random_center_initializer
elif self.init == "kmedoids++":
self._init = self._kmedoids_plus_plus_center_initializer
elif self.init == "first":
self._init = self._first_center_initializer
elif self.init == "build":
self._init = self._pam_build_center_initializer
else:
if (
isinstance(self.init_algorithm, np.ndarray)
and len(self.init_algorithm) == self.n_clusters
):
self._init_algorithm = self.init_algorithm
if isinstance(self.init, np.ndarray) and len(self.init) == self.n_clusters:
self._init = self.init
else:
raise ValueError(
f"The value provided for init_algorithm: {self.init_algorithm} is "
f"The value provided for init: {self.init} is "
f"invalid. The following are a list of valid init algorithms "
f"strings: random, kmedoids++, first. You can also pass a"
f"np.ndarray of size (n_clusters, n_channels, n_timepoints)"
Expand All @@ -469,7 +466,7 @@ def _check_params(self, X: np.ndarray) -> None:
else:
raise ValueError(f"method {self.method} is not supported")

if isinstance(self.init_algorithm, str) and self.init_algorithm == "build":
if isinstance(self.init, str) and self.init == "build":
if self.n_init != 10 and self.n_init > 1:
warnings.warn(
"When using build n_init does not need to be greater than 1. "
Expand Down Expand Up @@ -558,7 +555,7 @@ def _get_test_params(cls, parameter_set="default"):
"""
return {
"n_clusters": 2,
"init_algorithm": "random",
"init": "random",
"distance": "euclidean",
"n_init": 1,
"max_iter": 1,
Expand Down
12 changes: 6 additions & 6 deletions aeon/clustering/_k_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class TimeSeriesKShape(BaseClusterer):
n_clusters: int, default=8
The number of clusters to form as well as the number of
centroids to generate.
init_algorithm: str or np.ndarray, default='random'
Method for initializing cluster centres. Any of the following are valid:
init: str or np.ndarray, default='random'
Method for initialising cluster centres. Any of the following are valid:
['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints)
and gives the initial cluster centres.
n_init: int, default=10
Expand Down Expand Up @@ -76,15 +76,15 @@ class TimeSeriesKShape(BaseClusterer):
def __init__(
self,
n_clusters: int = 8,
init_algorithm: Union[str, np.ndarray] = "random",
init: Union[str, np.ndarray] = "random",
n_init: int = 10,
max_iter: int = 300,
tol: float = 1e-4,
verbose: bool = False,
random_state: Optional[Union[int, RandomState]] = None,
):
self.init_algorithm = init_algorithm
self.n_init = n_init
self.init = init
self.max_iter = max_iter
self.tol = tol
self.verbose = verbose
Expand Down Expand Up @@ -124,7 +124,7 @@ def _fit(self, X, y=None):
random_state=self.random_state,
n_init=self.n_init,
verbose=self.verbose,
init=self.init_algorithm,
init=self.init,
)

_X = X.swapaxes(1, 2)
Expand Down Expand Up @@ -173,7 +173,7 @@ def _get_test_params(cls, parameter_set="default"):
"""
return {
"n_clusters": 2,
"init_algorithm": "random",
"init": "random",
"n_init": 1,
"max_iter": 1,
"tol": 1e-4,
Expand Down
4 changes: 2 additions & 2 deletions aeon/clustering/tests/test_clara.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_clara_uni():
n_samples=10,
n_init=2,
max_iter=5,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=2,
)
Expand Down Expand Up @@ -68,7 +68,7 @@ def test_clara_multi():
n_samples=10,
n_init=2,
max_iter=5,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=2,
)
Expand Down
8 changes: 4 additions & 4 deletions aeon/clustering/tests/test_clarans.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_clarans_uni():
clarans = TimeSeriesCLARANS(
random_state=1,
n_init=2,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=2,
)
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_clara_multi():
clarans = TimeSeriesCLARANS(
random_state=1,
n_init=2,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=2,
)
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_medoids_init():
kmedoids = TimeSeriesCLARANS(
random_state=1,
n_init=1,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=num_clusters,
)
Expand All @@ -131,7 +131,7 @@ def test_medoids_init():
kmedoids = TimeSeriesCLARANS(
random_state=1,
n_init=1,
init_algorithm=custom_init_centres,
init=custom_init_centres,
distance="euclidean",
n_clusters=num_clusters,
)
Expand Down
14 changes: 7 additions & 7 deletions aeon/clustering/tests/test_k_medoids.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _pam_uni_medoids(X_train, y_train, X_test, y_test):
random_state=1,
n_init=2,
max_iter=5,
init_algorithm="first",
init="first",
distance="euclidean",
method="pam",
)
Expand All @@ -70,7 +70,7 @@ def _alternate_uni_medoids(X_train, y_train, X_test, y_test):
n_init=2,
max_iter=5,
method="alternate",
init_algorithm="first",
init="first",
distance="euclidean",
)
train_medoids_result = kmedoids.fit_predict(X_train)
Expand All @@ -95,7 +95,7 @@ def _pam_multi_medoids(X_train, y_train, X_test, y_test):
random_state=1,
n_init=2,
max_iter=5,
init_algorithm="first",
init="first",
distance="euclidean",
method="pam",
)
Expand All @@ -121,7 +121,7 @@ def _alternate_multi_medoids(X_train, y_train, X_test, y_test):
random_state=1,
n_init=2,
max_iter=5,
init_algorithm="first",
init="first",
method="alternate",
distance="euclidean",
)
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_medoids_init():
random_state=1,
n_init=1,
max_iter=5,
init_algorithm="first",
init="first",
distance="euclidean",
n_clusters=num_clusters,
)
Expand All @@ -194,7 +194,7 @@ def test_medoids_init():
random_state=1,
n_init=1,
max_iter=5,
init_algorithm=custom_init_centres,
init=custom_init_centres,
distance="euclidean",
n_clusters=num_clusters,
)
Expand All @@ -209,7 +209,7 @@ def _get_model_centres(data, distance, method="pam", distance_params=None):
method=method,
n_init=2,
n_clusters=2,
init_algorithm="random",
init="random",
distance=distance,
distance_params=distance_params,
)
Expand Down
Loading