Skip to content

Commit 36a8e27

Browse files
committed
used aeon mini rocket
1 parent 472e501 commit 36a8e27

File tree

1 file changed

+30
-241
lines changed

1 file changed

+30
-241
lines changed

aeon/clustering/_r_cluster.py

+30-241
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,9 @@
55
from sklearn.cluster import KMeans
66
from sklearn.decomposition import PCA
77
from sklearn.preprocessing import StandardScaler
8-
8+
from aeon.transformations.collection.convolution_based import MiniRocket
99
from aeon.clustering.base import BaseClusterer
1010

11-
from numba import njit, prange
12-
13-
1411
class RCluster(BaseClusterer):
1512
"""Time series R Clustering implementation .
1613
@@ -20,7 +17,8 @@ class RCluster(BaseClusterer):
2017
----------
2118
num_kernels : int , default = 84
2219
The number of convolutional kernels used to transform the input time series
23-
These kernels are fixed and pre-defined (not random) and are optimized for computational speed and
20+
These kernels are fixed and pre-defined (not random) and are
21+
optimized for computational speed and
2422
feature diversity
2523
2624
max_dilations_per_kernel : int , default = 32
@@ -35,14 +33,19 @@ class RCluster(BaseClusterer):
3533
The number of clusters used
3634
3735
n_init : int , default = 10
38-
The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds
36+
The number of times the clustering algorithm (e.g., KMeans) will
37+
run with different centroid seeds
3938
to avoid poor local optima
4039
4140
max_iter: int, default=300
4241
Maximum number of iterations of the k-means algorithm for a single
4342
run.
4443
random_state: int or np.random.RandomState instance or None, default=None
4544
Determines random number generation for centroid initialization.
45+
n_jobs : int, default=1
46+
The number of jobs to run in parallel for `transform`. ``-1``
47+
means using all
48+
processors.
4649
Notes
4750
-----
4851
Adapted from the implementation from source code
@@ -53,247 +56,28 @@ class RCluster(BaseClusterer):
5356
.. [1] Time series clustering with random convolutional kernels
5457
https://link.springer.com/article/10.1007/s10618-024-01018-x
5558
"""
59+
5660
def __init__(self,
57-
num_features=500,
5861
num_kernels=84,
5962
max_dilations_per_kernel=32,
6063
n_clusters=8,
6164
n_init=10,
6265
random_state: Optional[Union[int, RandomState]] = None,
63-
max_iter=300):
64-
self.num_features = num_features
65-
self.num_kernels = num_kernels
66-
self.max_dilations_per_kernel = max_dilations_per_kernel
66+
max_iter=300,
67+
n_jobs=-1):
6768
self.num_cluster = n_clusters
6869
self.n_init = n_init
6970
self.random_state = random_state
7071
self.max_iter = max_iter
71-
72+
self.mini_rocket = MiniRocket(n_kernels=num_kernels,
73+
max_dilations_per_kernel=max_dilations_per_kernel,
74+
n_jobs=n_jobs)
75+
self.fit = False
7276
super().__init__()
7377

74-
@staticmethod
75-
@njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True)
76-
def __fit_biases(X, dilations, num_features_per_dilation, quantiles):
77-
78-
num_examples, input_length = X.shape
79-
80-
# equivalent to:
81-
# >>> from itertools import combinations
82-
# >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
83-
###MODIFICATION
84-
indices = np.array((
85-
1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0,
86-
1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4,
87-
5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6,
88-
5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2,
89-
3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7,
90-
8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7,
91-
1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0,
92-
2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3,
93-
6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7,
94-
0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3,
95-
4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2,
96-
4, 0, 5, 7, 1, 3, 8, 1, 7, 8
97-
), dtype=np.int32).reshape(84, 3)
98-
99-
num_kernels = len(indices)
100-
num_dilations = len(dilations)
101-
102-
num_features = num_kernels * np.sum(num_features_per_dilation)
103-
104-
biases = np.zeros(num_features, dtype=np.float32)
105-
106-
feature_index_start = 0
107-
108-
for dilation_index in range(num_dilations):
109-
110-
dilation = dilations[dilation_index]
111-
padding = ((9 - 1) * dilation) // 2
112-
113-
num_features_this_dilation = num_features_per_dilation[dilation_index]
114-
115-
for kernel_index in range(num_kernels):
116-
117-
feature_index_end = feature_index_start + num_features_this_dilation
118-
119-
_X = X[np.random.randint(num_examples)]
120-
121-
A = -_X # A = alpha * X = -X
122-
G = _X + _X + _X # G = gamma * X = 3X
123-
124-
C_alpha = np.zeros(input_length, dtype=np.float32)
125-
C_alpha[:] = A
126-
127-
C_gamma = np.zeros((9, input_length), dtype=np.float32)
128-
C_gamma[9 // 2] = G
129-
130-
start = dilation
131-
end = input_length - padding
132-
133-
for gamma_index in range(9 // 2):
134-
C_alpha[-end:] = C_alpha[-end:] + A[:end]
135-
C_gamma[gamma_index, -end:] = G[:end]
136-
137-
end += dilation
138-
139-
for gamma_index in range(9 // 2 + 1, 9):
140-
C_alpha[:-start] = C_alpha[:-start] + A[start:]
141-
C_gamma[gamma_index, :-start] = G[start:]
142-
143-
start += dilation
144-
145-
index_0, index_1, index_2 = indices[kernel_index]
146-
147-
C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2]
148-
149-
biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[
150-
feature_index_start:feature_index_end])
151-
152-
feature_index_start = feature_index_end
153-
154-
return biases
155-
156-
def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel):
157-
158-
num_kernels = 84
159-
160-
num_features_per_kernel = num_features // num_kernels
161-
true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel)
162-
multiplier = num_features_per_kernel / true_max_dilations_per_kernel
163-
164-
max_exponent = np.log2((input_length - 1) / (9 - 1))
165-
dilations, num_features_per_dilation = \
166-
np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32),
167-
return_counts=True)
168-
num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector
169-
170-
remainder = num_features_per_kernel - np.sum(num_features_per_dilation)
171-
i = 0
172-
while remainder > 0:
173-
num_features_per_dilation[i] += 1
174-
remainder -= 1
175-
i = (i + 1) % len(num_features_per_dilation)
176-
177-
return dilations, num_features_per_dilation
178-
179-
def __quantiles(self,n):
180-
return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32)
181-
182-
def __fit_rocket(self,X):
183-
184-
_, input_length = X.shape
185-
186-
187-
dilations, num_features_per_dilation = self.__fit_dilations(input_length,
188-
self.num_features,
189-
self.max_dilations_per_kernel)
190-
191-
num_features_per_kernel = np.sum(num_features_per_dilation)
192-
193-
quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel)
194-
195-
###MODIFICATION
196-
quantiles = np.random.permutation(quantiles)
197-
198-
biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles)
199-
200-
return dilations, num_features_per_dilation, biases
201-
202-
def __transform(self,X, parameters):
203-
204-
num_examples, input_length = X.shape
205-
206-
dilations, num_features_per_dilation, biases = parameters
207-
208-
# equivalent to:
209-
# >>> from itertools import combinations
210-
# >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32)
211-
indices = np.array((
212-
1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0,
213-
1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4,
214-
5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6,
215-
5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2,
216-
3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7,
217-
8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7,
218-
1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0,
219-
2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3,
220-
6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7,
221-
0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3,
222-
4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2,
223-
4, 0, 5, 7, 1, 3, 8, 1, 7, 8
224-
), dtype=np.int32).reshape(84, 3)
225-
226-
num_kernels = len(indices)
227-
num_dilations = len(dilations)
228-
229-
num_features = num_kernels * np.sum(num_features_per_dilation)
230-
231-
features = np.zeros((num_examples, num_features), dtype=np.float32)
232-
233-
for example_index in prange(num_examples):
234-
235-
_X = X[example_index]
236-
237-
A = -_X # A = alpha * X = -X
238-
G = _X + _X + _X # G = gamma * X = 3X
239-
240-
feature_index_start = 0
241-
242-
for dilation_index in range(num_dilations):
243-
244-
_padding0 = dilation_index % 2
245-
246-
dilation = dilations[dilation_index]
247-
padding = ((9 - 1) * dilation) // 2
248-
249-
num_features_this_dilation = num_features_per_dilation[dilation_index]
250-
251-
C_alpha = np.zeros(input_length, dtype=np.float32)
252-
C_alpha[:] = A
253-
254-
C_gamma = np.zeros((9, input_length), dtype=np.float32)
255-
C_gamma[9 // 2] = G
256-
257-
start = dilation
258-
end = input_length - padding
259-
260-
for gamma_index in range(9 // 2):
261-
C_alpha[-end:] = C_alpha[-end:] + A[:end]
262-
C_gamma[gamma_index, -end:] = G[:end]
263-
264-
end += dilation
265-
266-
for gamma_index in range(9 // 2 + 1, 9):
267-
C_alpha[:-start] = C_alpha[:-start] + A[start:]
268-
C_gamma[gamma_index, :-start] = G[start:]
269-
270-
start += dilation
271-
272-
for kernel_index in range(num_kernels):
273-
274-
feature_index_end = feature_index_start + num_features_this_dilation
275-
276-
_padding1 = (_padding0 + kernel_index) % 2
277-
278-
index_0, index_1, index_2 = indices[kernel_index]
279-
280-
C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2]
281-
282-
if _padding1 == 0:
283-
for feature_count in range(num_features_this_dilation):
284-
features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean()
285-
else:
286-
for feature_count in range(num_features_this_dilation):
287-
features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean()
288-
289-
290-
feature_index_start = feature_index_end
291-
292-
return features
293-
294-
def _fit(self,X,y=None):
295-
parameters = self.__fit_rocket(X=X)
296-
transformed_data = self.__transform(X=X, parameters=parameters)
78+
def _fit(self, X, y=None):
79+
self.mini_rocket.fit(X=X)
80+
transformed_data = self.mini_rocket.transform(X=X)
29781

29882
sc = StandardScaler()
29983
X_std = sc.fit_transform(transformed_data)
@@ -309,23 +93,28 @@ def _fit(self,X,y=None):
30993
n_clusters=self.num_cluster,
31094
n_init=self.n_init,
31195
random_state=self.random_state,
312-
max_iter=self.max_iter)
96+
max_iter=self.max_iter, )
31397
self._r_cluster.fit(transformed_data_pca)
98+
self.fit = True
31499

315100
def _predict(self, X, y=None) -> np.ndarray:
101+
if not self.fit:
102+
raise ValueError("Data is not fitted. Please fit the model before using it.")
103+
104+
self.mini_rocket.fit(X=X)
105+
transformed_data = self.mini_rocket.transform(X=X)
316106

317-
parameters = self.__fit_rocket(X=X)
318-
transformed_data = self.__transform(X=X, parameters=parameters)
319107
sc = StandardScaler()
320108
X_std = sc.fit_transform(transformed_data)
321109

322110
pca_optimal = PCA(n_components=self.optimal_dimensions)
323111
transformed_data_pca = pca_optimal.fit_transform(X_std)
324112

325113
return self._r_cluster.predict(transformed_data_pca)
114+
326115
def _fit_predict(self, X, y=None) -> np.ndarray:
327-
parameters = self.__fit_rocket(X=X)
328-
transformed_data = self.__transform(X=X, parameters=parameters)
116+
self.mini_rocket.fit(X=X)
117+
transformed_data = self.mini_rocket.transform(X=X)
329118

330119
sc = StandardScaler()
331120
X_std = sc.fit_transform(transformed_data)
@@ -342,4 +131,4 @@ def _fit_predict(self, X, y=None) -> np.ndarray:
342131
n_init=self.n_init,
343132
random_state=self.random_state,
344133
max_iter=self.max_iter)
345-
return self._r_cluster.fit_predict(transformed_data_pca)
134+
return self._r_cluster.fit_predict(transformed_data_pca)

0 commit comments

Comments
 (0)