Skip to content

Commit 7a5af78

Browse files
Merge remote-tracking branch 'origin/main' into mm/estimator-bases
# Conflicts: # aeon/clustering/base.py
2 parents cbd1992 + 980e8bb commit 7a5af78

30 files changed

+131
-368
lines changed

README.md

+39-31
Large diffs are not rendered by default.

aeon/clustering/_clara.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def __init__(
139139
self.distance_params = distance_params
140140
self.n_samples = n_samples
141141
self.n_sampling_iters = n_sampling_iters
142+
self.n_clusters = n_clusters
142143

143144
self.cluster_centers_ = None
144145
self.labels_ = None
@@ -148,7 +149,7 @@ def __init__(
148149
self._random_state = None
149150
self._kmedoids_instance = None
150151

151-
super().__init__(n_clusters)
152+
super().__init__()
152153

153154
def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
154155
return self._kmedoids_instance.predict(X)
@@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None):
207208
self.n_iter_ = best_pam.n_iter_
208209
self._kmedoids_instance = best_pam
209210

210-
def _score(self, X, y=None):
211-
return -self.inertia_
212-
213211
@classmethod
214212
def _get_test_params(cls, parameter_set="default"):
215213
"""Return testing parameter settings for the estimator.

aeon/clustering/_elastic_som.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ def __init__(
179179
self.init = init
180180
self.sigma_decay_function = sigma_decay_function
181181
self.custom_alignment_path = custom_alignment_path
182+
self.n_clusters = n_clusters
182183

183184
self._random_state = None
184185
self._alignment_path_callable = None
@@ -191,7 +192,7 @@ def __init__(
191192

192193
self.labels_ = None
193194
self.cluster_centers_ = None
194-
super().__init__(n_clusters=n_clusters)
195+
super().__init__()
195196

196197
def _fit(self, X, y=None):
197198
self._check_params(X)
@@ -219,9 +220,6 @@ def _fit(self, X, y=None):
219220
def _predict(self, X, y=None):
220221
return self._find_bmu(X, self.cluster_centers_)
221222

222-
def _score(self, X, y=None):
223-
raise NotImplementedError("TimeSeriesSOM does not support scoring")
224-
225223
def _find_bmu(self, x, weights):
226224
pairwise_matrix = pairwise_distance(
227225
x,

aeon/clustering/_k_means.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def __init__(
192192
self.distance_params = distance_params
193193
self.average_params = average_params
194194
self.averaging_method = averaging_method
195+
self.n_clusters = n_clusters
195196

196197
self.cluster_centers_ = None
197198
self.labels_ = None
@@ -203,7 +204,7 @@ def __init__(
203204
self._averaging_method = None
204205
self._average_params = None
205206

206-
super().__init__(n_clusters)
207+
super().__init__()
207208

208209
def _fit(self, X: np.ndarray, y=None):
209210
self._check_params(X)
@@ -267,7 +268,7 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
267268
prev_inertia = curr_inertia
268269
prev_labels = curr_labels
269270

270-
if change_in_centres < self.tol:
271+
if change_in_centres < self.tol or (i + 1) == self.max_iter:
271272
break
272273

273274
# Compute new cluster centres
@@ -281,9 +282,6 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
281282

282283
return prev_labels, cluster_centres, prev_inertia, i + 1
283284

284-
def _score(self, X, y=None):
285-
return -self.inertia_
286-
287285
def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
288286
if isinstance(self.distance, str):
289287
pairwise_matrix = pairwise_distance(

aeon/clustering/_k_medoids.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def __init__(
171171
self.random_state = random_state
172172
self.distance_params = distance_params
173173
self.method = method
174+
self.n_clusters = n_clusters
174175

175176
self.cluster_centers_ = None
176177
self.labels_ = None
@@ -184,7 +185,7 @@ def __init__(
184185
self._fit_method = None
185186

186187
self._distance_params = {}
187-
super().__init__(n_clusters)
188+
super().__init__()
188189

189190
def _fit(self, X: np.ndarray, y=None):
190191
self._check_params(X)
@@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None):
207208
self.cluster_centers_ = best_centers
208209
self.n_iter_ = best_iters
209210

210-
def _score(self, X, y=None):
211-
return -self.inertia_
212-
213211
def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
214212
if isinstance(self.distance, str):
215213
pairwise_matrix = pairwise_distance(

aeon/clustering/_k_shape.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def __init__(
8989
self.tol = tol
9090
self.verbose = verbose
9191
self.random_state = random_state
92+
self.n_clusters = n_clusters
9293

9394
self.cluster_centers_ = None
9495
self.labels_ = None
@@ -97,7 +98,7 @@ def __init__(
9798

9899
self._tslearn_k_shapes = None
99100

100-
super().__init__(n_clusters=n_clusters)
101+
super().__init__()
101102

102103
def _fit(self, X, y=None):
103104
"""Fit time series clusterer to training data.
@@ -130,7 +131,7 @@ def _fit(self, X, y=None):
130131

131132
self._tslearn_k_shapes.fit(_X)
132133
self._cluster_centers = self._tslearn_k_shapes.cluster_centers_
133-
self.labels_ = self._tslearn_k_shapes.labels_
134+
self.labels_ = self._tslearn_k_shapes.predict(_X)
134135
self.inertia_ = self._tslearn_k_shapes.inertia_
135136
self.n_iter_ = self._tslearn_k_shapes.n_iter_
136137

@@ -179,6 +180,3 @@ def _get_test_params(cls, parameter_set="default"):
179180
"verbose": False,
180181
"random_state": 1,
181182
}
182-
183-
def _score(self, X, y=None):
184-
return np.abs(self.inertia_)

aeon/clustering/_k_shapes.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(
9090
self.tol = tol
9191
self.verbose = verbose
9292
self.random_state = random_state
93+
self.n_clusters = n_clusters
9394

9495
self.cluster_centers_ = None
9596
self.labels_ = None
@@ -98,7 +99,7 @@ def __init__(
9899

99100
self._tslearn_k_shapes = None
100101

101-
super().__init__(n_clusters=n_clusters)
102+
super().__init__()
102103

103104
def _fit(self, X, y=None):
104105
"""Fit time series clusterer to training data.
@@ -131,7 +132,7 @@ def _fit(self, X, y=None):
131132

132133
self._tslearn_k_shapes.fit(_X)
133134
self._cluster_centers = self._tslearn_k_shapes.cluster_centers_
134-
self.labels_ = self._tslearn_k_shapes.labels_
135+
self.labels_ = self._tslearn_k_shapes.predict(_X)
135136
self.inertia_ = self._tslearn_k_shapes.inertia_
136137
self.n_iter_ = self._tslearn_k_shapes.n_iter_
137138

@@ -180,6 +181,3 @@ def _get_test_params(cls, parameter_set="default"):
180181
"verbose": False,
181182
"random_state": 1,
182183
}
183-
184-
def _score(self, X, y=None):
185-
return np.abs(self.inertia_)

aeon/clustering/_kernel_k_means.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def __init__(
108108
self.verbose = verbose
109109
self.n_jobs = n_jobs
110110
self.random_state = random_state
111+
self.n_clusters = n_clusters
111112

112113
self.cluster_centers_ = None
113114
self.labels_ = None
@@ -116,7 +117,7 @@ def __init__(
116117

117118
self._tslearn_kernel_k_means = None
118119

119-
super().__init__(n_clusters=n_clusters)
120+
super().__init__()
120121

121122
def _fit(self, X, y=None):
122123
"""Fit time series clusterer to training data.
@@ -204,6 +205,3 @@ def _get_test_params(cls, parameter_set="default") -> dict:
204205
"n_jobs": 1,
205206
"random_state": 1,
206207
}
207-
208-
def _score(self, X, y=None) -> float:
209-
return np.abs(self.inertia_)

aeon/clustering/base.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
"""Base class for clustering."""
22

3-
from typing import Optional
4-
53
__maintainer__ = []
64
__all__ = ["BaseClusterer"]
75

@@ -27,8 +25,7 @@ class BaseClusterer(BaseCollectionEstimator):
2725
}
2826

2927
@abstractmethod
30-
def __init__(self, n_clusters: Optional[int] = None):
31-
self.n_clusters = n_clusters
28+
def __init__(self):
3229
# required for compatibility with some sklearn interfaces e.g.
3330
# CalibratedClassifierCV
3431
self._estimator_type = "clusterer"
@@ -121,6 +118,7 @@ def predict_proba(self, X) -> np.ndarray:
121118
self._check_shape(X)
122119
return self._predict_proba(X)
123120

121+
@final
124122
def fit_predict(self, X, y=None) -> np.ndarray:
125123
"""Compute cluster centers and predict cluster index for each time series.
126124
@@ -139,11 +137,10 @@ def fit_predict(self, X, y=None) -> np.ndarray:
139137
np.ndarray (1d array of shape (n_cases,))
140138
Index of the cluster each time series in X belongs to.
141139
"""
142-
self.fit(X)
143-
return self.predict(X)
140+
return self._fit_predict(X, y)
144141

145-
def score(self, X, y=None) -> float:
146-
"""Score the quality of the clusterer.
142+
def _fit_predict(self, X, y=None) -> np.ndarray:
143+
"""Fit predict using base methods.
147144
148145
Parameters
149146
----------
@@ -155,13 +152,11 @@ def score(self, X, y=None) -> float:
155152
156153
Returns
157154
-------
158-
score : float
159-
Score of the clusterer.
155+
np.ndarray (1d array of shape (n_cases,))
156+
Index of the cluster each time series in X belongs to.
160157
"""
161-
self._check_is_fitted()
162-
X = self._preprocess_collection(X, store_metadata=False)
163-
self._check_shape(X)
164-
return self._score(X, y)
158+
self.fit(X)
159+
return self.labels_
165160

166161
def _predict_proba(self, X) -> np.ndarray:
167162
"""Predicts labels probabilities for sequences in X.
@@ -194,17 +189,17 @@ def _predict_proba(self, X) -> np.ndarray:
194189
for i, u in enumerate(unique):
195190
preds[preds == u] = i
196191
n_cases = len(preds)
197-
n_clusters = self.n_clusters
192+
if hasattr(self, "n_clusters"):
193+
n_clusters = self.n_clusters
194+
else:
195+
n_clusters = len(np.unique(preds))
198196
if n_clusters is None:
199197
n_clusters = int(max(preds)) + 1
200-
dists = np.zeros((X.shape[0], n_clusters))
198+
dists = np.zeros((len(X), n_clusters))
201199
for i in range(n_cases):
202200
dists[i, preds[i]] = 1
203201
return dists
204202

205-
@abstractmethod
206-
def _score(self, X, y=None): ...
207-
208203
@abstractmethod
209204
def _predict(self, X) -> np.ndarray:
210205
"""Predict the closest cluster each sample in X belongs to.

aeon/clustering/compose/_pipeline.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,9 @@ def __init__(self, transformers, clusterer, random_state=None):
8686
)
8787

8888
def _fit(self, X, y=None):
89-
return super()._fit(X, y)
90-
91-
def _score(self, X, y=None):
92-
raise NotImplementedError("Pipeline does not support scoring.")
89+
super()._fit(X, y)
90+
self.labels_ = self.steps_[-1][1].labels_
91+
return self
9392

9493
@classmethod
9594
def _get_test_params(cls, parameter_set="default"):

aeon/clustering/deep_learning/_ae_abgru.py

-10
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer):
2020
2121
Parameters
2222
----------
23-
n_clusters : int, default=None
24-
Number of clusters for the deep learnign model.
2523
clustering_algorithm : str, default="deprecated"
2624
Use 'estimator' parameter instead.
2725
clustering_params : dict, default=None
@@ -106,7 +104,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer):
106104

107105
def __init__(
108106
self,
109-
n_clusters=None,
110107
estimator=None,
111108
clustering_algorithm="deprecated",
112109
clustering_params=None,
@@ -153,7 +150,6 @@ def __init__(
153150
self.random_state = random_state
154151

155152
super().__init__(
156-
n_clusters=n_clusters,
157153
clustering_algorithm=clustering_algorithm,
158154
clustering_params=clustering_params,
159155
estimator=estimator,
@@ -302,12 +298,6 @@ def _fit(self, X):
302298

303299
return self
304300

305-
def _score(self, X, y=None):
306-
# Transpose to conform to Keras input style.
307-
X = X.transpose(0, 2, 1)
308-
latent_space = self.model_.layers[1].predict(X)
309-
return self._estimator.score(latent_space)
310-
311301
@classmethod
312302
def _get_test_params(cls, parameter_set="default"):
313303
"""Return testing parameter settings for the estimator.

aeon/clustering/deep_learning/_ae_bgru.py

-10
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ class AEBiGRUClusterer(BaseDeepClusterer):
2020
2121
Parameters
2222
----------
23-
n_clusters : int, default=None
24-
Number of clusters for the deep learnign model.
2523
clustering_algorithm : str, default="deprecated"
2624
Use 'estimator' parameter instead.
2725
clustering_params : dict, default=None
@@ -105,7 +103,6 @@ class AEBiGRUClusterer(BaseDeepClusterer):
105103

106104
def __init__(
107105
self,
108-
n_clusters=None,
109106
clustering_algorithm="deprecated",
110107
estimator=None,
111108
clustering_params=None,
@@ -152,7 +149,6 @@ def __init__(
152149
self.random_state = random_state
153150

154151
super().__init__(
155-
n_clusters=n_clusters,
156152
clustering_algorithm=clustering_algorithm,
157153
clustering_params=clustering_params,
158154
estimator=estimator,
@@ -300,12 +296,6 @@ def _fit(self, X):
300296

301297
return self
302298

303-
def _score(self, X, y=None):
304-
# Transpose to conform to Keras input style.
305-
X = X.transpose(0, 2, 1)
306-
latent_space = self.model_.layers[1].predict(X)
307-
return self._estimator.score(latent_space)
308-
309299
@classmethod
310300
def _get_test_params(cls, parameter_set="default"):
311301
"""Return testing parameter settings for the estimator.

0 commit comments

Comments
 (0)