1
1
"""Base class for clustering."""
2
2
3
- from typing import Optional
4
-
5
3
__maintainer__ = []
6
4
__all__ = ["BaseClusterer" ]
7
5
@@ -27,8 +25,7 @@ class BaseClusterer(BaseCollectionEstimator):
27
25
}
28
26
29
27
@abstractmethod
30
- def __init__ (self , n_clusters : Optional [int ] = None ):
31
- self .n_clusters = n_clusters
28
+ def __init__ (self ):
32
29
# required for compatibility with some sklearn interfaces e.g.
33
30
# CalibratedClassifierCV
34
31
self ._estimator_type = "clusterer"
@@ -121,6 +118,7 @@ def predict_proba(self, X) -> np.ndarray:
121
118
self ._check_shape (X )
122
119
return self ._predict_proba (X )
123
120
121
+ @final
124
122
def fit_predict (self , X , y = None ) -> np .ndarray :
125
123
"""Compute cluster centers and predict cluster index for each time series.
126
124
@@ -139,11 +137,10 @@ def fit_predict(self, X, y=None) -> np.ndarray:
139
137
np.ndarray (1d array of shape (n_cases,))
140
138
Index of the cluster each time series in X belongs to.
141
139
"""
142
- self .fit (X )
143
- return self .predict (X )
140
+ return self ._fit_predict (X , y )
144
141
145
- def score (self , X , y = None ) -> float :
146
- """Score the quality of the clusterer .
142
+ def _fit_predict (self , X , y = None ) -> np . ndarray :
143
+ """Fit predict using base methods .
147
144
148
145
Parameters
149
146
----------
@@ -155,13 +152,11 @@ def score(self, X, y=None) -> float:
155
152
156
153
Returns
157
154
-------
158
- score : float
159
- Score of the clusterer .
155
+ np.ndarray (1d array of shape (n_cases,))
156
+ Index of the cluster each time series in X belongs to .
160
157
"""
161
- self ._check_is_fitted ()
162
- X = self ._preprocess_collection (X , store_metadata = False )
163
- self ._check_shape (X )
164
- return self ._score (X , y )
158
+ self .fit (X )
159
+ return self .labels_
165
160
166
161
def _predict_proba (self , X ) -> np .ndarray :
167
162
"""Predicts labels probabilities for sequences in X.
@@ -194,17 +189,17 @@ def _predict_proba(self, X) -> np.ndarray:
194
189
for i , u in enumerate (unique ):
195
190
preds [preds == u ] = i
196
191
n_cases = len (preds )
197
- n_clusters = self .n_clusters
192
+ if hasattr (self , "n_clusters" ):
193
+ n_clusters = self .n_clusters
194
+ else :
195
+ n_clusters = len (np .unique (preds ))
198
196
if n_clusters is None :
199
197
n_clusters = int (max (preds )) + 1
200
- dists = np .zeros ((X . shape [ 0 ] , n_clusters ))
198
+ dists = np .zeros ((len ( X ) , n_clusters ))
201
199
for i in range (n_cases ):
202
200
dists [i , preds [i ]] = 1
203
201
return dists
204
202
205
- @abstractmethod
206
- def _score (self , X , y = None ): ...
207
-
208
203
@abstractmethod
209
204
def _predict (self , X ) -> np .ndarray :
210
205
"""Predict the closest cluster each sample in X belongs to.
0 commit comments