5
5
from sklearn .cluster import KMeans
6
6
from sklearn .decomposition import PCA
7
7
from sklearn .preprocessing import StandardScaler
8
-
8
+ from aeon . transformations . collection . convolution_based import MiniRocket
9
9
from aeon .clustering .base import BaseClusterer
10
10
11
- from numba import njit , prange
12
-
13
-
14
11
class RCluster (BaseClusterer ):
15
12
"""Time series R Clustering implementation .
16
13
@@ -20,7 +17,8 @@ class RCluster(BaseClusterer):
20
17
----------
21
18
num_kernels : int , default = 84
22
19
The number of convolutional kernels used to transform the input time series
23
- These kernels are fixed and pre-defined (not random) and are optimized for computational speed and
20
+ These kernels are fixed and pre-defined (not random) and are
21
+ optimized for computational speed and
24
22
feature diversity
25
23
26
24
max_dilations_per_kernel : int , default = 32
@@ -35,14 +33,19 @@ class RCluster(BaseClusterer):
35
33
The number of clusters used
36
34
37
35
n_init : int , default = 10
38
- The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds
36
+ The number of times the clustering algorithm (e.g., KMeans) will
37
+ run with different centroid seeds
39
38
to avoid poor local optima
40
39
41
40
max_iter: int, default=300
42
41
Maximum number of iterations of the k-means algorithm for a single
43
42
run.
44
43
random_state: int or np.random.RandomState instance or None, default=None
45
44
Determines random number generation for centroid initialization.
45
+ n_jobs : int, default=1
46
+ The number of jobs to run in parallel for `transform`. ``-1``
47
+ means using all
48
+ processors.
46
49
Notes
47
50
-----
48
51
Adapted from the implementation from source code
@@ -53,247 +56,28 @@ class RCluster(BaseClusterer):
53
56
.. [1] Time series clustering with random convolutional kernels
54
57
https://link.springer.com/article/10.1007/s10618-024-01018-x
55
58
"""
59
+
56
60
def __init__ (self ,
57
- num_features = 500 ,
58
61
num_kernels = 84 ,
59
62
max_dilations_per_kernel = 32 ,
60
63
n_clusters = 8 ,
61
64
n_init = 10 ,
62
65
random_state : Optional [Union [int , RandomState ]] = None ,
63
- max_iter = 300 ):
64
- self .num_features = num_features
65
- self .num_kernels = num_kernels
66
- self .max_dilations_per_kernel = max_dilations_per_kernel
66
+ max_iter = 300 ,
67
+ n_jobs = - 1 ):
67
68
self .num_cluster = n_clusters
68
69
self .n_init = n_init
69
70
self .random_state = random_state
70
71
self .max_iter = max_iter
71
-
72
+ self .mini_rocket = MiniRocket (n_kernels = num_kernels ,
73
+ max_dilations_per_kernel = max_dilations_per_kernel ,
74
+ n_jobs = n_jobs )
75
+ self .fit = False
72
76
super ().__init__ ()
73
77
74
- @staticmethod
75
- @njit ("float32[:](float32[:,:],int32[:],int32[:],float32[:])" , fastmath = True , parallel = False , cache = True )
76
- def __fit_biases (X , dilations , num_features_per_dilation , quantiles ):
77
-
78
- num_examples , input_length = X .shape
79
-
80
- # equivalent to:
81
- # >>> from itertools import combinations
82
- # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
83
- ###MODIFICATION
84
- indices = np .array ((
85
- 1 , 3 , 6 , 1 , 2 , 7 , 1 , 2 , 3 , 0 , 2 , 3 , 1 , 4 , 5 , 0 , 1 , 3 , 3 , 5 , 6 , 0 ,
86
- 1 , 2 , 2 , 5 , 8 , 1 , 3 , 7 , 0 , 1 , 8 , 4 , 6 , 7 , 0 , 1 , 4 , 3 , 4 , 6 , 0 , 4 ,
87
- 5 , 2 , 6 , 7 , 5 , 6 , 7 , 0 , 1 , 6 , 4 , 5 , 7 , 4 , 7 , 8 , 1 , 6 , 8 , 0 , 2 , 6 ,
88
- 5 , 6 , 8 , 2 , 5 , 7 , 0 , 1 , 7 , 0 , 7 , 8 , 0 , 3 , 5 , 0 , 3 , 7 , 2 , 3 , 8 , 2 ,
89
- 3 , 4 , 1 , 4 , 6 , 3 , 4 , 5 , 0 , 3 , 8 , 4 , 5 , 8 , 0 , 4 , 6 , 1 , 4 , 8 , 6 , 7 ,
90
- 8 , 4 , 6 , 8 , 0 , 3 , 4 , 1 , 3 , 4 , 1 , 5 , 7 , 1 , 4 , 7 , 1 , 2 , 8 , 0 , 6 , 7 ,
91
- 1 , 6 , 7 , 1 , 3 , 5 , 0 , 1 , 5 , 0 , 4 , 8 , 4 , 5 , 6 , 0 , 2 , 5 , 3 , 5 , 7 , 0 ,
92
- 2 , 4 , 2 , 6 , 8 , 2 , 3 , 7 , 2 , 5 , 6 , 2 , 4 , 8 , 0 , 2 , 7 , 3 , 6 , 8 , 2 , 3 ,
93
- 6 , 3 , 7 , 8 , 0 , 5 , 8 , 1 , 2 , 6 , 2 , 3 , 5 , 1 , 5 , 8 , 3 , 6 , 7 , 3 , 4 , 7 ,
94
- 0 , 4 , 7 , 3 , 5 , 8 , 2 , 4 , 5 , 1 , 2 , 5 , 2 , 7 , 8 , 2 , 4 , 6 , 0 , 5 , 6 , 3 ,
95
- 4 , 8 , 0 , 6 , 8 , 2 , 4 , 7 , 0 , 2 , 8 , 0 , 3 , 6 , 5 , 7 , 8 , 1 , 5 , 6 , 1 , 2 ,
96
- 4 , 0 , 5 , 7 , 1 , 3 , 8 , 1 , 7 , 8
97
- ), dtype = np .int32 ).reshape (84 , 3 )
98
-
99
- num_kernels = len (indices )
100
- num_dilations = len (dilations )
101
-
102
- num_features = num_kernels * np .sum (num_features_per_dilation )
103
-
104
- biases = np .zeros (num_features , dtype = np .float32 )
105
-
106
- feature_index_start = 0
107
-
108
- for dilation_index in range (num_dilations ):
109
-
110
- dilation = dilations [dilation_index ]
111
- padding = ((9 - 1 ) * dilation ) // 2
112
-
113
- num_features_this_dilation = num_features_per_dilation [dilation_index ]
114
-
115
- for kernel_index in range (num_kernels ):
116
-
117
- feature_index_end = feature_index_start + num_features_this_dilation
118
-
119
- _X = X [np .random .randint (num_examples )]
120
-
121
- A = - _X # A = alpha * X = -X
122
- G = _X + _X + _X # G = gamma * X = 3X
123
-
124
- C_alpha = np .zeros (input_length , dtype = np .float32 )
125
- C_alpha [:] = A
126
-
127
- C_gamma = np .zeros ((9 , input_length ), dtype = np .float32 )
128
- C_gamma [9 // 2 ] = G
129
-
130
- start = dilation
131
- end = input_length - padding
132
-
133
- for gamma_index in range (9 // 2 ):
134
- C_alpha [- end :] = C_alpha [- end :] + A [:end ]
135
- C_gamma [gamma_index , - end :] = G [:end ]
136
-
137
- end += dilation
138
-
139
- for gamma_index in range (9 // 2 + 1 , 9 ):
140
- C_alpha [:- start ] = C_alpha [:- start ] + A [start :]
141
- C_gamma [gamma_index , :- start ] = G [start :]
142
-
143
- start += dilation
144
-
145
- index_0 , index_1 , index_2 = indices [kernel_index ]
146
-
147
- C = C_alpha + C_gamma [index_0 ] + C_gamma [index_1 ] + C_gamma [index_2 ]
148
-
149
- biases [feature_index_start :feature_index_end ] = np .quantile (C , quantiles [
150
- feature_index_start :feature_index_end ])
151
-
152
- feature_index_start = feature_index_end
153
-
154
- return biases
155
-
156
- def __fit_dilations (self ,input_length , num_features , max_dilations_per_kernel ):
157
-
158
- num_kernels = 84
159
-
160
- num_features_per_kernel = num_features // num_kernels
161
- true_max_dilations_per_kernel = min (num_features_per_kernel , max_dilations_per_kernel )
162
- multiplier = num_features_per_kernel / true_max_dilations_per_kernel
163
-
164
- max_exponent = np .log2 ((input_length - 1 ) / (9 - 1 ))
165
- dilations , num_features_per_dilation = \
166
- np .unique (np .logspace (0 , max_exponent , true_max_dilations_per_kernel , base = 2 ).astype (np .int32 ),
167
- return_counts = True )
168
- num_features_per_dilation = (num_features_per_dilation * multiplier ).astype (np .int32 ) # this is a vector
169
-
170
- remainder = num_features_per_kernel - np .sum (num_features_per_dilation )
171
- i = 0
172
- while remainder > 0 :
173
- num_features_per_dilation [i ] += 1
174
- remainder -= 1
175
- i = (i + 1 ) % len (num_features_per_dilation )
176
-
177
- return dilations , num_features_per_dilation
178
-
179
- def __quantiles (self ,n ):
180
- return np .array ([(_ * ((np .sqrt (5 ) + 1 ) / 2 )) % 1 for _ in range (1 , n + 1 )], dtype = np .float32 )
181
-
182
- def __fit_rocket (self ,X ):
183
-
184
- _ , input_length = X .shape
185
-
186
-
187
- dilations , num_features_per_dilation = self .__fit_dilations (input_length ,
188
- self .num_features ,
189
- self .max_dilations_per_kernel )
190
-
191
- num_features_per_kernel = np .sum (num_features_per_dilation )
192
-
193
- quantiles = self .__quantiles (self .num_kernels * num_features_per_kernel )
194
-
195
- ###MODIFICATION
196
- quantiles = np .random .permutation (quantiles )
197
-
198
- biases = self .__fit_biases (X , dilations , num_features_per_dilation , quantiles )
199
-
200
- return dilations , num_features_per_dilation , biases
201
-
202
- def __transform (self ,X , parameters ):
203
-
204
- num_examples , input_length = X .shape
205
-
206
- dilations , num_features_per_dilation , biases = parameters
207
-
208
- # equivalent to:
209
- # >>> from itertools import combinations
210
- # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
211
- indices = np .array ((
212
- 1 , 3 , 6 , 1 , 2 , 7 , 1 , 2 , 3 , 0 , 2 , 3 , 1 , 4 , 5 , 0 , 1 , 3 , 3 , 5 , 6 , 0 ,
213
- 1 , 2 , 2 , 5 , 8 , 1 , 3 , 7 , 0 , 1 , 8 , 4 , 6 , 7 , 0 , 1 , 4 , 3 , 4 , 6 , 0 , 4 ,
214
- 5 , 2 , 6 , 7 , 5 , 6 , 7 , 0 , 1 , 6 , 4 , 5 , 7 , 4 , 7 , 8 , 1 , 6 , 8 , 0 , 2 , 6 ,
215
- 5 , 6 , 8 , 2 , 5 , 7 , 0 , 1 , 7 , 0 , 7 , 8 , 0 , 3 , 5 , 0 , 3 , 7 , 2 , 3 , 8 , 2 ,
216
- 3 , 4 , 1 , 4 , 6 , 3 , 4 , 5 , 0 , 3 , 8 , 4 , 5 , 8 , 0 , 4 , 6 , 1 , 4 , 8 , 6 , 7 ,
217
- 8 , 4 , 6 , 8 , 0 , 3 , 4 , 1 , 3 , 4 , 1 , 5 , 7 , 1 , 4 , 7 , 1 , 2 , 8 , 0 , 6 , 7 ,
218
- 1 , 6 , 7 , 1 , 3 , 5 , 0 , 1 , 5 , 0 , 4 , 8 , 4 , 5 , 6 , 0 , 2 , 5 , 3 , 5 , 7 , 0 ,
219
- 2 , 4 , 2 , 6 , 8 , 2 , 3 , 7 , 2 , 5 , 6 , 2 , 4 , 8 , 0 , 2 , 7 , 3 , 6 , 8 , 2 , 3 ,
220
- 6 , 3 , 7 , 8 , 0 , 5 , 8 , 1 , 2 , 6 , 2 , 3 , 5 , 1 , 5 , 8 , 3 , 6 , 7 , 3 , 4 , 7 ,
221
- 0 , 4 , 7 , 3 , 5 , 8 , 2 , 4 , 5 , 1 , 2 , 5 , 2 , 7 , 8 , 2 , 4 , 6 , 0 , 5 , 6 , 3 ,
222
- 4 , 8 , 0 , 6 , 8 , 2 , 4 , 7 , 0 , 2 , 8 , 0 , 3 , 6 , 5 , 7 , 8 , 1 , 5 , 6 , 1 , 2 ,
223
- 4 , 0 , 5 , 7 , 1 , 3 , 8 , 1 , 7 , 8
224
- ), dtype = np .int32 ).reshape (84 , 3 )
225
-
226
- num_kernels = len (indices )
227
- num_dilations = len (dilations )
228
-
229
- num_features = num_kernels * np .sum (num_features_per_dilation )
230
-
231
- features = np .zeros ((num_examples , num_features ), dtype = np .float32 )
232
-
233
- for example_index in prange (num_examples ):
234
-
235
- _X = X [example_index ]
236
-
237
- A = - _X # A = alpha * X = -X
238
- G = _X + _X + _X # G = gamma * X = 3X
239
-
240
- feature_index_start = 0
241
-
242
- for dilation_index in range (num_dilations ):
243
-
244
- _padding0 = dilation_index % 2
245
-
246
- dilation = dilations [dilation_index ]
247
- padding = ((9 - 1 ) * dilation ) // 2
248
-
249
- num_features_this_dilation = num_features_per_dilation [dilation_index ]
250
-
251
- C_alpha = np .zeros (input_length , dtype = np .float32 )
252
- C_alpha [:] = A
253
-
254
- C_gamma = np .zeros ((9 , input_length ), dtype = np .float32 )
255
- C_gamma [9 // 2 ] = G
256
-
257
- start = dilation
258
- end = input_length - padding
259
-
260
- for gamma_index in range (9 // 2 ):
261
- C_alpha [- end :] = C_alpha [- end :] + A [:end ]
262
- C_gamma [gamma_index , - end :] = G [:end ]
263
-
264
- end += dilation
265
-
266
- for gamma_index in range (9 // 2 + 1 , 9 ):
267
- C_alpha [:- start ] = C_alpha [:- start ] + A [start :]
268
- C_gamma [gamma_index , :- start ] = G [start :]
269
-
270
- start += dilation
271
-
272
- for kernel_index in range (num_kernels ):
273
-
274
- feature_index_end = feature_index_start + num_features_this_dilation
275
-
276
- _padding1 = (_padding0 + kernel_index ) % 2
277
-
278
- index_0 , index_1 , index_2 = indices [kernel_index ]
279
-
280
- C = C_alpha + C_gamma [index_0 ] + C_gamma [index_1 ] + C_gamma [index_2 ]
281
-
282
- if _padding1 == 0 :
283
- for feature_count in range (num_features_this_dilation ):
284
- features [example_index , feature_index_start + feature_count ] = ((C > biases [feature_index_start + feature_count ]).astype (float )).mean ()
285
- else :
286
- for feature_count in range (num_features_this_dilation ):
287
- features [example_index , feature_index_start + feature_count ] = ((C [padding :- padding ] > biases [feature_index_start + feature_count ]).astype (float )).mean ()
288
-
289
-
290
- feature_index_start = feature_index_end
291
-
292
- return features
293
-
294
- def _fit (self ,X ,y = None ):
295
- parameters = self .__fit_rocket (X = X )
296
- transformed_data = self .__transform (X = X , parameters = parameters )
78
+ def _fit (self , X , y = None ):
79
+ self .mini_rocket .fit (X = X )
80
+ transformed_data = self .mini_rocket .transform (X = X )
297
81
298
82
sc = StandardScaler ()
299
83
X_std = sc .fit_transform (transformed_data )
@@ -309,23 +93,28 @@ def _fit(self,X,y=None):
309
93
n_clusters = self .num_cluster ,
310
94
n_init = self .n_init ,
311
95
random_state = self .random_state ,
312
- max_iter = self .max_iter )
96
+ max_iter = self .max_iter , )
313
97
self ._r_cluster .fit (transformed_data_pca )
98
+ self .fit = True
314
99
315
100
def _predict (self , X , y = None ) -> np .ndarray :
101
+ if not self .fit :
102
+ raise ValueError ("Data is not fitted. Please fit the model before using it." )
103
+
104
+ self .mini_rocket .fit (X = X )
105
+ transformed_data = self .mini_rocket .transform (X = X )
316
106
317
- parameters = self .__fit_rocket (X = X )
318
- transformed_data = self .__transform (X = X , parameters = parameters )
319
107
sc = StandardScaler ()
320
108
X_std = sc .fit_transform (transformed_data )
321
109
322
110
pca_optimal = PCA (n_components = self .optimal_dimensions )
323
111
transformed_data_pca = pca_optimal .fit_transform (X_std )
324
112
325
113
return self ._r_cluster .predict (transformed_data_pca )
114
+
326
115
def _fit_predict (self , X , y = None ) -> np .ndarray :
327
- parameters = self .__fit_rocket (X = X )
328
- transformed_data = self .__transform (X = X , parameters = parameters )
116
+ self .mini_rocket . fit (X = X )
117
+ transformed_data = self .mini_rocket . transform (X = X )
329
118
330
119
sc = StandardScaler ()
331
120
X_std = sc .fit_transform (transformed_data )
@@ -342,4 +131,4 @@ def _fit_predict(self, X, y=None) -> np.ndarray:
342
131
n_init = self .n_init ,
343
132
random_state = self .random_state ,
344
133
max_iter = self .max_iter )
345
- return self ._r_cluster .fit_predict (transformed_data_pca )
134
+ return self ._r_cluster .fit_predict (transformed_data_pca )
0 commit comments