Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Sort out clustering base class #2251

Merged
merged 32 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2306ebd
remove y from predict
TonyBagnall Oct 25, 2024
0035e49
Merge branch 'main' into ajb/base_clst
TonyBagnall Oct 25, 2024
2c4fd41
Merge branch 'main' into ajb/base_clst
TonyBagnall Oct 31, 2024
04c385b
remove score
TonyBagnall Oct 31, 2024
a346080
remove score, add fit_predict
TonyBagnall Oct 31, 2024
60615dd
remove score, add fit_predict
TonyBagnall Oct 31, 2024
aad00ef
removed score and n_clusters from base class
chrisholder Nov 1, 2024
bed81be
merged main
chrisholder Nov 4, 2024
24732d2
fit_predict
chrisholder Nov 4, 2024
e83387c
fixed predict proba
chrisholder Nov 6, 2024
30fbda9
fixed dnn tests
chrisholder Nov 6, 2024
3557ac4
fixed dnn tests
chrisholder Nov 6, 2024
2a5807d
fix notebook
chrisholder Nov 6, 2024
42e5a16
merge main
chrisholder Nov 6, 2024
3f80902
fixed
chrisholder Nov 6, 2024
63ed72d
Merge branch 'main' into ajb/base_clst
TonyBagnall Nov 12, 2024
4c2475f
Merge branch 'main' into ajb/base_clst
chrisholder Nov 15, 2024
a591c31
removed deep learner n_clusters and assert labels_ exists
chrisholder Nov 15, 2024
2ee81ba
cont
chrisholder Nov 17, 2024
ebec4e6
fix dnns
chrisholder Nov 19, 2024
692849e
merge
chrisholder Nov 19, 2024
02fcee0
pipeline clusterer
chrisholder Nov 19, 2024
9104808
fix pipeline
chrisholder Nov 19, 2024
1c85487
revert
chrisholder Nov 19, 2024
c89d854
fix
chrisholder Nov 20, 2024
4c1e6d0
remove score from dnns
chrisholder Nov 20, 2024
6d68492
remove score from notebooks
chrisholder Nov 20, 2024
a56b4d4
remove score from remaining
chrisholder Nov 20, 2024
f6b0ffb
fix notebook
chrisholder Nov 20, 2024
c61c3c9
fixed kmeans bug stopping tests working
chrisholder Nov 20, 2024
5cc63cf
fixed
chrisholder Nov 20, 2024
f8660d0
docstring fix
chrisholder Nov 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions aeon/clustering/_clara.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def __init__(
self.distance_params = distance_params
self.n_samples = n_samples
self.n_sampling_iters = n_sampling_iters
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -148,7 +149,7 @@ def __init__(
self._random_state = None
self._kmedoids_instance = None

super().__init__(n_clusters)
super().__init__()

def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
return self._kmedoids_instance.predict(X)
Expand Down Expand Up @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None):
self.n_iter_ = best_pam.n_iter_
self._kmedoids_instance = best_pam

def _score(self, X, y=None):
return -self.inertia_

@classmethod
def _get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Expand Down
6 changes: 2 additions & 4 deletions aeon/clustering/_elastic_som.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def __init__(
self.init = init
self.sigma_decay_function = sigma_decay_function
self.custom_alignment_path = custom_alignment_path
self.n_clusters = n_clusters

self._random_state = None
self._alignment_path_callable = None
Expand All @@ -191,7 +192,7 @@ def __init__(

self.labels_ = None
self.cluster_centers_ = None
super().__init__(n_clusters=n_clusters)
super().__init__()

def _fit(self, X, y=None):
self._check_params(X)
Expand Down Expand Up @@ -219,9 +220,6 @@ def _fit(self, X, y=None):
def _predict(self, X, y=None):
return self._find_bmu(X, self.cluster_centers_)

def _score(self, X, y=None):
raise NotImplementedError("TimeSeriesSOM does not support scoring")

def _find_bmu(self, x, weights):
pairwise_matrix = pairwise_distance(
x,
Expand Down
6 changes: 2 additions & 4 deletions aeon/clustering/_k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def __init__(
self.distance_params = distance_params
self.average_params = average_params
self.averaging_method = averaging_method
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -203,7 +204,7 @@ def __init__(
self._averaging_method = None
self._average_params = None

super().__init__(n_clusters)
super().__init__()

def _fit(self, X: np.ndarray, y=None):
self._check_params(X)
Expand Down Expand Up @@ -281,9 +282,6 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:

return prev_labels, cluster_centres, prev_inertia, i + 1

def _score(self, X, y=None):
return -self.inertia_

def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
if isinstance(self.distance, str):
pairwise_matrix = pairwise_distance(
Expand Down
6 changes: 2 additions & 4 deletions aeon/clustering/_k_medoids.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def __init__(
self.random_state = random_state
self.distance_params = distance_params
self.method = method
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -184,7 +185,7 @@ def __init__(
self._fit_method = None

self._distance_params = {}
super().__init__(n_clusters)
super().__init__()

def _fit(self, X: np.ndarray, y=None):
self._check_params(X)
Expand All @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None):
self.cluster_centers_ = best_centers
self.n_iter_ = best_iters

def _score(self, X, y=None):
return -self.inertia_

def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
if isinstance(self.distance, str):
pairwise_matrix = pairwise_distance(
Expand Down
6 changes: 2 additions & 4 deletions aeon/clustering/_k_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(
self.tol = tol
self.verbose = verbose
self.random_state = random_state
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -97,7 +98,7 @@ def __init__(

self._tslearn_k_shapes = None

super().__init__(n_clusters=n_clusters)
super().__init__()

def _fit(self, X, y=None):
"""Fit time series clusterer to training data.
Expand Down Expand Up @@ -179,6 +180,3 @@ def _get_test_params(cls, parameter_set="default"):
"verbose": False,
"random_state": 1,
}

def _score(self, X, y=None):
return np.abs(self.inertia_)
6 changes: 2 additions & 4 deletions aeon/clustering/_k_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def __init__(
self.tol = tol
self.verbose = verbose
self.random_state = random_state
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -98,7 +99,7 @@ def __init__(

self._tslearn_k_shapes = None

super().__init__(n_clusters=n_clusters)
super().__init__()

def _fit(self, X, y=None):
"""Fit time series clusterer to training data.
Expand Down Expand Up @@ -180,6 +181,3 @@ def _get_test_params(cls, parameter_set="default"):
"verbose": False,
"random_state": 1,
}

def _score(self, X, y=None):
return np.abs(self.inertia_)
6 changes: 2 additions & 4 deletions aeon/clustering/_kernel_k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def __init__(
self.verbose = verbose
self.n_jobs = n_jobs
self.random_state = random_state
self.n_clusters = n_clusters

self.cluster_centers_ = None
self.labels_ = None
Expand All @@ -116,7 +117,7 @@ def __init__(

self._tslearn_kernel_k_means = None

super().__init__(n_clusters=n_clusters)
super().__init__()

def _fit(self, X, y=None):
"""Fit time series clusterer to training data.
Expand Down Expand Up @@ -204,6 +205,3 @@ def _get_test_params(cls, parameter_set="default") -> dict:
"n_jobs": 1,
"random_state": 1,
}

def _score(self, X, y=None) -> float:
return np.abs(self.inertia_)
33 changes: 14 additions & 19 deletions aeon/clustering/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Base class for clustering."""

from typing import Optional

__maintainer__ = []
__all__ = ["BaseClusterer"]

Expand All @@ -28,8 +26,7 @@ class BaseClusterer(BaseCollectionEstimator):
"fit_is_empty": False,
}

def __init__(self, n_clusters: Optional[int] = None):
self.n_clusters = n_clusters
def __init__(self):
# required for compatibility with some sklearn interfaces e.g.
# CalibratedClassifierCV
self._estimator_type = "clusterer"
Expand Down Expand Up @@ -125,6 +122,7 @@ def predict_proba(self, X) -> np.ndarray:
self._check_shape(X)
return self._predict_proba(X)

@final
def fit_predict(self, X, y=None) -> np.ndarray:
"""Compute cluster centers and predict cluster index for each time series.

Expand All @@ -143,11 +141,10 @@ def fit_predict(self, X, y=None) -> np.ndarray:
np.ndarray (1d array of shape (n_cases,))
Index of the cluster each time series in X belongs to.
"""
self.fit(X)
return self.predict(X)
return self._fit_predict(X, y)

def score(self, X, y=None) -> float:
"""Score the quality of the clusterer.
def _fit_predict(self, X, y=None) -> np.ndarray:
"""Fit predict using base methods.

Parameters
----------
Expand All @@ -159,13 +156,11 @@ def score(self, X, y=None) -> float:

Returns
-------
score : float
Score of the clusterer.
np.ndarray (1d array of shape (n_cases,))
Index of the cluster each time series in X belongs to.
"""
self._check_is_fitted()
X = self._preprocess_collection(X, store_metadata=False)
self._check_shape(X)
return self._score(X, y)
self.fit(X)
return self.labels_

def _predict_proba(self, X) -> np.ndarray:
"""Predicts labels probabilities for sequences in X.
Expand Down Expand Up @@ -198,17 +193,17 @@ def _predict_proba(self, X) -> np.ndarray:
for i, u in enumerate(unique):
preds[preds == u] = i
n_cases = len(preds)
n_clusters = self.n_clusters
if hasattr(self, "n_clusters"):
n_clusters = self.n_clusters
else:
n_clusters = len(np.unique(preds))
chrisholder marked this conversation as resolved.
Show resolved Hide resolved
if n_clusters is None:
n_clusters = int(max(preds)) + 1
dists = np.zeros((X.shape[0], n_clusters))
dists = np.zeros((len(X), n_clusters))
for i in range(n_cases):
dists[i, preds[i]] = 1
return dists

@abstractmethod
def _score(self, X, y=None): ...

@abstractmethod
def _predict(self, X) -> np.ndarray:
"""Predict the closest cluster each sample in X belongs to.
Expand Down
7 changes: 3 additions & 4 deletions aeon/clustering/compose/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,9 @@ def __init__(self, transformers, clusterer, random_state=None):
)

def _fit(self, X, y=None):
return super()._fit(X, y)

def _score(self, X, y=None):
raise NotImplementedError("Pipeline does not support scoring.")
super()._fit(X, y)
self.labels_ = self.steps_[-1][1].labels_
return self

@classmethod
def _get_test_params(cls, parameter_set="default"):
Expand Down
10 changes: 0 additions & 10 deletions aeon/clustering/deep_learning/_ae_abgru.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer):

Parameters
----------
n_clusters : int, default=None
Number of clusters for the deep learnign model.
clustering_algorithm : str, default="deprecated"
Use 'estimator' parameter instead.
clustering_params : dict, default=None
Expand Down Expand Up @@ -106,7 +104,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer):

def __init__(
self,
n_clusters=None,
estimator=None,
clustering_algorithm="deprecated",
clustering_params=None,
Expand Down Expand Up @@ -153,7 +150,6 @@ def __init__(
self.random_state = random_state

super().__init__(
n_clusters=n_clusters,
clustering_algorithm=clustering_algorithm,
clustering_params=clustering_params,
estimator=estimator,
Expand Down Expand Up @@ -302,12 +298,6 @@ def _fit(self, X):

return self

def _score(self, X, y=None):
# Transpose to conform to Keras input style.
X = X.transpose(0, 2, 1)
latent_space = self.model_.layers[1].predict(X)
return self._estimator.score(latent_space)

@classmethod
def _get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Expand Down
10 changes: 0 additions & 10 deletions aeon/clustering/deep_learning/_ae_bgru.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ class AEBiGRUClusterer(BaseDeepClusterer):

Parameters
----------
n_clusters : int, default=None
Number of clusters for the deep learnign model.
clustering_algorithm : str, default="deprecated"
Use 'estimator' parameter instead.
clustering_params : dict, default=None
Expand Down Expand Up @@ -105,7 +103,6 @@ class AEBiGRUClusterer(BaseDeepClusterer):

def __init__(
self,
n_clusters=None,
clustering_algorithm="deprecated",
estimator=None,
clustering_params=None,
Expand Down Expand Up @@ -152,7 +149,6 @@ def __init__(
self.random_state = random_state

super().__init__(
n_clusters=n_clusters,
clustering_algorithm=clustering_algorithm,
clustering_params=clustering_params,
estimator=estimator,
Expand Down Expand Up @@ -300,12 +296,6 @@ def _fit(self, X):

return self

def _score(self, X, y=None):
# Transpose to conform to Keras input style.
X = X.transpose(0, 2, 1)
latent_space = self.model_.layers[1].predict(X)
return self._estimator.score(latent_space)

@classmethod
def _get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Expand Down
10 changes: 0 additions & 10 deletions aeon/clustering/deep_learning/_ae_dcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ class AEDCNNClusterer(BaseDeepClusterer):

Parameters
----------
n_clusters : int, default=None
Number of clusters for the deep learnign model.
clustering_algorithm : str, default="deprecated"
Use 'estimator' parameter instead.
clustering_params : dict, default=None
Expand Down Expand Up @@ -119,7 +117,6 @@ class AEDCNNClusterer(BaseDeepClusterer):

def __init__(
self,
n_clusters=None,
estimator=None,
clustering_algorithm="deprecated",
clustering_params=None,
Expand Down Expand Up @@ -174,7 +171,6 @@ def __init__(
self.random_state = random_state

super().__init__(
n_clusters=n_clusters,
clustering_params=clustering_params,
clustering_algorithm=clustering_algorithm,
estimator=estimator,
Expand Down Expand Up @@ -326,12 +322,6 @@ def _fit(self, X):

return self

def _score(self, X, y=None):
# Transpose to conform to Keras input style.
X = X.transpose(0, 2, 1)
latent_space = self.model_.layers[1].predict(X)
return self._estimator.score(latent_space)

@classmethod
def _get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Expand Down
Loading