From 5092822b9e65f34574f03a454896c106c5a61b04 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 23 Nov 2024 04:05:44 +0530 Subject: [PATCH 001/103] create a r_clustering model --- aeon/clustering/_r_cluster.py | 323 ++++++++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 aeon/clustering/_r_cluster.py diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py new file mode 100644 index 0000000000..d3fde5ca94 --- /dev/null +++ b/aeon/clustering/_r_cluster.py @@ -0,0 +1,323 @@ +import numpy as np +from numpy.random import RandomState + +from typing import Optional, Union +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler + +from aeon.clustering.base import BaseClusterer + +from numba import njit, prange + + +class RCluster(BaseClusterer): + """Time series R Clustering implementation . + + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + def __init__(self, + num_features=500, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300): + self.num_features = num_features + self.num_kernels = num_kernels + self.max_dilations_per_kernel = max_dilations_per_kernel + self.num_cluster = n_clusters + self.n_init = n_init + self.random_state = random_state + self.max_iter = max_iter + + @staticmethod + @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) + def __fit_biases(X, dilations, num_features_per_dilation, quantiles): + + num_examples, input_length = X.shape + + # equivalent to: + # >>> from itertools import combinations + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + ###MODIFICATION + indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype=np.int32).reshape(84, 3) + + num_kernels = len(indices) + num_dilations = len(dilations) + + num_features = num_kernels * np.sum(num_features_per_dilation) + + biases = np.zeros(num_features, dtype=np.float32) + + feature_index_start = 0 + + for dilation_index in range(num_dilations): + + dilation = dilations[dilation_index] + padding = ((9 - 1) * dilation) // 2 + + num_features_this_dilation = num_features_per_dilation[dilation_index] + + for kernel_index in range(num_kernels): + + feature_index_end = feature_index_start + num_features_this_dilation + + _X = X[np.random.randint(num_examples)] + + A = -_X # A = alpha * X = -X + G = _X + _X + _X # G = gamma * X = 3X + + C_alpha = np.zeros(input_length, dtype=np.float32) + C_alpha[:] = A + + C_gamma = np.zeros((9, input_length), dtype=np.float32) + C_gamma[9 // 2] = G + + start = dilation + end = input_length - padding + + for gamma_index in range(9 // 2): + C_alpha[-end:] = C_alpha[-end:] + A[:end] + C_gamma[gamma_index, -end:] = G[:end] + + end += dilation + + for gamma_index in range(9 // 2 + 1, 9): + C_alpha[:-start] = C_alpha[:-start] + A[start:] + C_gamma[gamma_index, :-start] = G[start:] + + start += dilation + + index_0, index_1, index_2 = indices[kernel_index] + + C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] + + biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ + feature_index_start:feature_index_end]) + + feature_index_start = feature_index_end + + return biases + + def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): + + num_kernels = 84 + + num_features_per_kernel = num_features // num_kernels + true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) + multiplier = num_features_per_kernel / true_max_dilations_per_kernel + + max_exponent = np.log2((input_length - 1) / (9 - 1)) + dilations, num_features_per_dilation = \ + np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), + return_counts=True) + num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector + + remainder = num_features_per_kernel - np.sum(num_features_per_dilation) + i = 0 + while remainder > 0: + num_features_per_dilation[i] += 1 + remainder -= 1 + i = (i + 1) % len(num_features_per_dilation) + + return dilations, num_features_per_dilation + + def __quantiles(self,n): + return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) + + def __fit_rocket(self,X): + + _, input_length = X.shape + + + dilations, num_features_per_dilation = self.__fit_dilations(input_length, + self.num_features, + self.max_dilations_per_kernel) + + num_features_per_kernel = np.sum(num_features_per_dilation) + + quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) + + ###MODIFICATION + quantiles = np.random.permutation(quantiles) + + biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) + + return dilations, num_features_per_dilation, biases + + def __transform(self,X, parameters): + + num_examples, input_length = X.shape + + dilations, num_features_per_dilation, biases = parameters + + # equivalent to: + # >>> from itertools import combinations + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype=np.int32).reshape(84, 3) + + num_kernels = len(indices) + num_dilations = len(dilations) + + num_features = num_kernels * np.sum(num_features_per_dilation) + + features = np.zeros((num_examples, num_features), dtype=np.float32) + + for example_index in prange(num_examples): + + _X = X[example_index] + + A = -_X # A = alpha * X = -X + G = _X + _X + _X # G = gamma * X = 3X + + feature_index_start = 0 + + for dilation_index in range(num_dilations): + + _padding0 = dilation_index % 2 + + dilation = dilations[dilation_index] + padding = ((9 - 1) * dilation) // 2 + + num_features_this_dilation = num_features_per_dilation[dilation_index] + + C_alpha = np.zeros(input_length, dtype=np.float32) + C_alpha[:] = A + + C_gamma = np.zeros((9, input_length), dtype=np.float32) + C_gamma[9 // 2] = G + + start = dilation + end = input_length - padding + + for gamma_index in range(9 // 2): + C_alpha[-end:] = C_alpha[-end:] + A[:end] + C_gamma[gamma_index, -end:] = G[:end] + + end += dilation + + for gamma_index in range(9 // 2 + 1, 9): + C_alpha[:-start] = C_alpha[:-start] + A[start:] + C_gamma[gamma_index, :-start] = G[start:] + + start += dilation + + for kernel_index in range(num_kernels): + + feature_index_end = feature_index_start + num_features_this_dilation + + _padding1 = (_padding0 + kernel_index) % 2 + + index_0, index_1, index_2 = indices[kernel_index] + + C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] + + if _padding1 == 0: + for feature_count in range(num_features_this_dilation): + features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() + else: + for feature_count in range(num_features_this_dilation): + features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() + + + feature_index_start = feature_index_end + + return features + + def _fit(self,X,y=None): + parameters = self.__fit_rocket(X=X) + transformed_data = self.__transform(X=X, parameters=parameters) + + sc = StandardScaler() + X_std = sc.fit_transform(transformed_data) + + pca = PCA().fit(X_std) + + self.optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + pca_optimal = PCA(n_components=self.optimal_dimensions) + transformed_data_pca = pca_optimal.fit_transform(X_std) + + self._r_cluster = KMeans( + n_clusters=self.num_cluster, + n_init=self.n_init, + random_state=self.random_state, + max_iter=self.max_iter) + self._r_cluster.fit(transformed_data_pca) + + def _predict(self, X, y=None) -> np.ndarray: + + parameters = self.__fit_rocket(X=X) + transformed_data = self.__transform(X=X, parameters=parameters) + sc = StandardScaler() + X_std = sc.fit_transform(transformed_data) + + pca_optimal = PCA(n_components=self.optimal_dimensions) + transformed_data_pca = pca_optimal.fit_transform(X_std) + + return self._r_cluster.predict(transformed_data_pca) \ No newline at end of file From af09db0b8e169208860b1ccabdea025ee1480522 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 22 Nov 2024 22:48:52 +0000 Subject: [PATCH 002/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 721 +++++++++++++++++++++++++++++----- 1 file changed, 619 insertions(+), 102 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index d3fde5ca94..fb14fbdd8c 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,66 +1,69 @@ +from typing import Optional, Union + import numpy as np +from numba import njit, prange from numpy.random import RandomState - -from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from numba import njit, prange - class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - def __init__(self, - num_features=500, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + def __init__( + self, + num_features=500, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300, + ): self.num_features = num_features self.num_kernels = num_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -70,7 +73,12 @@ def __init__(self, self.max_iter = max_iter @staticmethod - @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) + @njit( + "float32[:](float32[:,:],int32[:],int32[:],float32[:])", + fastmath=True, + parallel=False, + cache=True, + ) def __fit_biases(X, dilations, num_features_per_dilation, quantiles): num_examples, input_length = X.shape @@ -79,20 +87,263 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) ###MODIFICATION - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) + indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -144,26 +395,34 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ - feature_index_start:feature_index_end]) + biases[feature_index_start:feature_index_end] = np.quantile( + C, quantiles[feature_index_start:feature_index_end] + ) feature_index_start = feature_index_end return biases - def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): + def __fit_dilations(self, input_length, num_features, max_dilations_per_kernel): num_kernels = 84 num_features_per_kernel = num_features // num_kernels - true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) + true_max_dilations_per_kernel = min( + num_features_per_kernel, max_dilations_per_kernel + ) multiplier = num_features_per_kernel / true_max_dilations_per_kernel max_exponent = np.log2((input_length - 1) / (9 - 1)) - dilations, num_features_per_dilation = \ - np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), - return_counts=True) - num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector + dilations, num_features_per_dilation = np.unique( + np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype( + np.int32 + ), + return_counts=True, + ) + num_features_per_dilation = (num_features_per_dilation * multiplier).astype( + np.int32 + ) # this is a vector remainder = num_features_per_kernel - np.sum(num_features_per_dilation) i = 0 @@ -174,17 +433,19 @@ def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): return dilations, num_features_per_dilation - def __quantiles(self,n): - return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) + def __quantiles(self, n): + return np.array( + [(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], + dtype=np.float32, + ) - def __fit_rocket(self,X): + def __fit_rocket(self, X): _, input_length = X.shape - - dilations, num_features_per_dilation = self.__fit_dilations(input_length, - self.num_features, - self.max_dilations_per_kernel) + dilations, num_features_per_dilation = self.__fit_dilations( + input_length, self.num_features, self.max_dilations_per_kernel + ) num_features_per_kernel = np.sum(num_features_per_dilation) @@ -197,7 +458,7 @@ def __fit_rocket(self,X): return dilations, num_features_per_dilation, biases - def __transform(self,X, parameters): + def __transform(self, X, parameters): num_examples, input_length = X.shape @@ -206,20 +467,263 @@ def __transform(self,X, parameters): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) + indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -279,17 +783,29 @@ def __transform(self,X, parameters): if _padding1 == 0: for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() + features[ + example_index, feature_index_start + feature_count + ] = ( + ( + C > biases[feature_index_start + feature_count] + ).astype(float) + ).mean() else: for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() - + features[ + example_index, feature_index_start + feature_count + ] = ( + ( + C[padding:-padding] + > biases[feature_index_start + feature_count] + ).astype(float) + ).mean() feature_index_start = feature_index_end return features - def _fit(self,X,y=None): + def _fit(self, X, y=None): parameters = self.__fit_rocket(X=X) transformed_data = self.__transform(X=X, parameters=parameters) @@ -307,7 +823,8 @@ def _fit(self,X,y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter, + ) self._r_cluster.fit(transformed_data_pca) def _predict(self, X, y=None) -> np.ndarray: @@ -320,4 +837,4 @@ def _predict(self, X, y=None) -> np.ndarray: pca_optimal = PCA(n_components=self.optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) - return self._r_cluster.predict(transformed_data_pca) \ No newline at end of file + return self._r_cluster.predict(transformed_data_pca) From 4f661a2ba6ce22c9ddff7578fb53a858c5d69674 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 23 Nov 2024 14:24:37 +0530 Subject: [PATCH 003/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index fb14fbdd8c..965d5e2002 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -19,12 +19,14 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + These kernels are fixed and pre-defined (not random) and are optimized + for computational speed and feature diversity max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, + Dilations control the spacing of the kernel's receptive field + over the time series, capturing patterns at varying scales num_features : int , default = 500 @@ -32,7 +34,7 @@ class RCluster(BaseClusterer): num_cluster : int , default = 8 The number of clusters used - + n_init : int , default = 10 The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds to avoid poor local optima @@ -85,8 +87,9 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - ###MODIFICATION + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], + #dtype = np.int32) + #MODIFICATION indices = np.array( ( 1, @@ -466,7 +469,8 @@ def __transform(self, X, parameters): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] + #, dtype = np.int32) indices = np.array( ( 1, From 4b9606e71033f64a04d0a2ae31d9a5878890c550 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 23 Nov 2024 14:25:36 +0530 Subject: [PATCH 004/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 965d5e2002..17199484db 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -36,7 +36,8 @@ class RCluster(BaseClusterer): The number of clusters used n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + The number of times the clustering algorithm (e.g., KMeans) will run with + different centroid seeds to avoid poor local optima max_iter: int, default=300 From fd3d5c710726251fae09ce0551085bca05587f88 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 23 Nov 2024 08:56:55 +0000 Subject: [PATCH 005/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 17199484db..dffcb34e39 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -19,13 +19,13 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and feature diversity max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field + Dilations control the spacing of the kernel's receptive field over the time series, capturing patterns at varying scales @@ -34,9 +34,9 @@ class RCluster(BaseClusterer): num_cluster : int , default = 8 The number of clusters used - + n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds to avoid poor local optima @@ -88,9 +88,9 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], - #dtype = np.int32) - #MODIFICATION + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], + # dtype = np.int32) + # MODIFICATION indices = np.array( ( 1, @@ -471,7 +471,7 @@ def __transform(self, X, parameters): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] - #, dtype = np.int32) + # , dtype = np.int32) indices = np.array( ( 1, From 47ff1371c92b59b7eee36e70b3a5103fecd356ee Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 23 Nov 2024 14:30:23 +0530 Subject: [PATCH 006/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index dffcb34e39..0b11c95b57 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -455,7 +455,7 @@ def __fit_rocket(self, X): quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - ###MODIFICATION + #MODIFICATION quantiles = np.random.permutation(quantiles) biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) From 6eb536fd651a25df4947580e1a78110452fc8bb3 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 23 Nov 2024 09:00:54 +0000 Subject: [PATCH 007/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 0b11c95b57..9e21e758a8 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -455,7 +455,7 @@ def __fit_rocket(self, X): quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - #MODIFICATION + # MODIFICATION quantiles = np.random.permutation(quantiles) biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) From bf38de35e442adb13340e5e936bfdbac16250f77 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 04:24:13 +0530 Subject: [PATCH 008/103] create a r_clustering model --- aeon/clustering/__init__.py | 2 + aeon/clustering/_r_cluster.py | 754 +++++------------------------- docs/api_reference/clustering.rst | 1 + 3 files changed, 130 insertions(+), 627 deletions(-) diff --git a/aeon/clustering/__init__.py b/aeon/clustering/__init__.py index 2eec5142cf..8c93abf682 100644 --- a/aeon/clustering/__init__.py +++ b/aeon/clustering/__init__.py @@ -11,6 +11,7 @@ "ElasticSOM", "KSpectralCentroid", "DummyClusterer", + "RCluster", ] from aeon.clustering._clara import TimeSeriesCLARA @@ -21,5 +22,6 @@ from aeon.clustering._k_sc import KSpectralCentroid from aeon.clustering._k_shape import TimeSeriesKShape from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans +from aeon.clustering._r_cluster import RCluster from aeon.clustering.base import BaseClusterer from aeon.clustering.dummy import DummyClusterer diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 9e21e758a8..a33bca38bd 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,72 +1,66 @@ -from typing import Optional, Union - import numpy as np -from numba import njit, prange from numpy.random import RandomState + +from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer +from numba import njit, prange + class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized - for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field - over the time series, - capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with - different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - - def __init__( - self, - num_features=500, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300, - ): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + def __init__(self, + num_features=500, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300): self.num_features = num_features self.num_kernels = num_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -75,279 +69,32 @@ def __init__( self.random_state = random_state self.max_iter = max_iter + super().__init__() + @staticmethod - @njit( - "float32[:](float32[:,:],int32[:],int32[:],float32[:])", - fastmath=True, - parallel=False, - cache=True, - ) + @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) def __fit_biases(X, dilations, num_features_per_dilation, quantiles): num_examples, input_length = X.shape # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], - # dtype = np.int32) - # MODIFICATION - indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + ###MODIFICATION + indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype=np.int32).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -399,34 +146,26 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - biases[feature_index_start:feature_index_end] = np.quantile( - C, quantiles[feature_index_start:feature_index_end] - ) + biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ + feature_index_start:feature_index_end]) feature_index_start = feature_index_end return biases - def __fit_dilations(self, input_length, num_features, max_dilations_per_kernel): + def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): num_kernels = 84 num_features_per_kernel = num_features // num_kernels - true_max_dilations_per_kernel = min( - num_features_per_kernel, max_dilations_per_kernel - ) + true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) multiplier = num_features_per_kernel / true_max_dilations_per_kernel max_exponent = np.log2((input_length - 1) / (9 - 1)) - dilations, num_features_per_dilation = np.unique( - np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype( - np.int32 - ), - return_counts=True, - ) - num_features_per_dilation = (num_features_per_dilation * multiplier).astype( - np.int32 - ) # this is a vector + dilations, num_features_per_dilation = \ + np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), + return_counts=True) + num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector remainder = num_features_per_kernel - np.sum(num_features_per_dilation) i = 0 @@ -437,32 +176,30 @@ def __fit_dilations(self, input_length, num_features, max_dilations_per_kernel): return dilations, num_features_per_dilation - def __quantiles(self, n): - return np.array( - [(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], - dtype=np.float32, - ) + def __quantiles(self,n): + return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) - def __fit_rocket(self, X): + def __fit_rocket(self,X): _, input_length = X.shape - dilations, num_features_per_dilation = self.__fit_dilations( - input_length, self.num_features, self.max_dilations_per_kernel - ) + + dilations, num_features_per_dilation = self.__fit_dilations(input_length, + self.num_features, + self.max_dilations_per_kernel) num_features_per_kernel = np.sum(num_features_per_dilation) quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - # MODIFICATION + ###MODIFICATION quantiles = np.random.permutation(quantiles) biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) return dilations, num_features_per_dilation, biases - def __transform(self, X, parameters): + def __transform(self,X, parameters): num_examples, input_length = X.shape @@ -470,265 +207,21 @@ def __transform(self, X, parameters): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] - # , dtype = np.int32) - indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype=np.int32).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -788,29 +281,17 @@ def __transform(self, X, parameters): if _padding1 == 0: for feature_count in range(num_features_this_dilation): - features[ - example_index, feature_index_start + feature_count - ] = ( - ( - C > biases[feature_index_start + feature_count] - ).astype(float) - ).mean() + features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() else: for feature_count in range(num_features_this_dilation): - features[ - example_index, feature_index_start + feature_count - ] = ( - ( - C[padding:-padding] - > biases[feature_index_start + feature_count] - ).astype(float) - ).mean() + features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() + feature_index_start = feature_index_end return features - def _fit(self, X, y=None): + def _fit(self,X,y=None): parameters = self.__fit_rocket(X=X) transformed_data = self.__transform(X=X, parameters=parameters) @@ -828,8 +309,7 @@ def _fit(self, X, y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter, - ) + max_iter=self.max_iter) self._r_cluster.fit(transformed_data_pca) def _predict(self, X, y=None) -> np.ndarray: @@ -843,3 +323,23 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data_pca = pca_optimal.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) + def _fit_predict(self, X, y=None) -> np.ndarray: + parameters = self.__fit_rocket(X=X) + transformed_data = self.__transform(X=X, parameters=parameters) + + sc = StandardScaler() + X_std = sc.fit_transform(transformed_data) + + pca = PCA().fit(X_std) + + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + pca_optimal = PCA(n_components=optimal_dimensions) + transformed_data_pca = pca_optimal.fit_transform(X_std) + + self._r_cluster = KMeans( + n_clusters=self.num_cluster, + n_init=self.n_init, + random_state=self.random_state, + max_iter=self.max_iter) + return self._r_cluster.fit_predict(transformed_data_pca) diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst index c519406495..400c0b1583 100644 --- a/docs/api_reference/clustering.rst +++ b/docs/api_reference/clustering.rst @@ -40,6 +40,7 @@ Clustering Algorithms TimeSeriesCLARANS ElasticSOM KSpectralCentroid + RCluster Base ---- From 8b2130886049e7a5cbc9e11ac5e82b77c3723e52 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 26 Nov 2024 22:55:31 +0000 Subject: [PATCH 009/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 723 +++++++++++++++++++++++++++++----- 1 file changed, 621 insertions(+), 102 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a33bca38bd..2379d276d0 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,66 +1,69 @@ +from typing import Optional, Union + import numpy as np +from numba import njit, prange from numpy.random import RandomState - -from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from numba import njit, prange - class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - def __init__(self, - num_features=500, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + def __init__( + self, + num_features=500, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300, + ): self.num_features = num_features self.num_kernels = num_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -72,7 +75,12 @@ def __init__(self, super().__init__() @staticmethod - @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) + @njit( + "float32[:](float32[:,:],int32[:],int32[:],float32[:])", + fastmath=True, + parallel=False, + cache=True, + ) def __fit_biases(X, dilations, num_features_per_dilation, quantiles): num_examples, input_length = X.shape @@ -81,20 +89,263 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) ###MODIFICATION - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) + indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -146,26 +397,34 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ - feature_index_start:feature_index_end]) + biases[feature_index_start:feature_index_end] = np.quantile( + C, quantiles[feature_index_start:feature_index_end] + ) feature_index_start = feature_index_end return biases - def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): + def __fit_dilations(self, input_length, num_features, max_dilations_per_kernel): num_kernels = 84 num_features_per_kernel = num_features // num_kernels - true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) + true_max_dilations_per_kernel = min( + num_features_per_kernel, max_dilations_per_kernel + ) multiplier = num_features_per_kernel / true_max_dilations_per_kernel max_exponent = np.log2((input_length - 1) / (9 - 1)) - dilations, num_features_per_dilation = \ - np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), - return_counts=True) - num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector + dilations, num_features_per_dilation = np.unique( + np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype( + np.int32 + ), + return_counts=True, + ) + num_features_per_dilation = (num_features_per_dilation * multiplier).astype( + np.int32 + ) # this is a vector remainder = num_features_per_kernel - np.sum(num_features_per_dilation) i = 0 @@ -176,17 +435,19 @@ def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): return dilations, num_features_per_dilation - def __quantiles(self,n): - return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) + def __quantiles(self, n): + return np.array( + [(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], + dtype=np.float32, + ) - def __fit_rocket(self,X): + def __fit_rocket(self, X): _, input_length = X.shape - - dilations, num_features_per_dilation = self.__fit_dilations(input_length, - self.num_features, - self.max_dilations_per_kernel) + dilations, num_features_per_dilation = self.__fit_dilations( + input_length, self.num_features, self.max_dilations_per_kernel + ) num_features_per_kernel = np.sum(num_features_per_dilation) @@ -199,7 +460,7 @@ def __fit_rocket(self,X): return dilations, num_features_per_dilation, biases - def __transform(self,X, parameters): + def __transform(self, X, parameters): num_examples, input_length = X.shape @@ -208,20 +469,263 @@ def __transform(self,X, parameters): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) + indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) num_kernels = len(indices) num_dilations = len(dilations) @@ -281,17 +785,29 @@ def __transform(self,X, parameters): if _padding1 == 0: for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() + features[ + example_index, feature_index_start + feature_count + ] = ( + ( + C > biases[feature_index_start + feature_count] + ).astype(float) + ).mean() else: for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() - + features[ + example_index, feature_index_start + feature_count + ] = ( + ( + C[padding:-padding] + > biases[feature_index_start + feature_count] + ).astype(float) + ).mean() feature_index_start = feature_index_end return features - def _fit(self,X,y=None): + def _fit(self, X, y=None): parameters = self.__fit_rocket(X=X) transformed_data = self.__transform(X=X, parameters=parameters) @@ -309,7 +825,8 @@ def _fit(self,X,y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter, + ) self._r_cluster.fit(transformed_data_pca) def _predict(self, X, y=None) -> np.ndarray: @@ -323,6 +840,7 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data_pca = pca_optimal.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) + def _fit_predict(self, X, y=None) -> np.ndarray: parameters = self.__fit_rocket(X=X) transformed_data = self.__transform(X=X, parameters=parameters) @@ -341,5 +859,6 @@ def _fit_predict(self, X, y=None) -> np.ndarray: n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter, + ) return self._r_cluster.fit_predict(transformed_data_pca) From 54bf41774ced202abde57df6637af392713c2631 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 04:29:57 +0530 Subject: [PATCH 010/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 2379d276d0..c9564f9e40 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -19,12 +19,14 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and feature diversity max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, + Dilations control the spacing of the kernel's receptive + field over the time series, capturing patterns at varying scales num_features : int , default = 500 @@ -34,7 +36,8 @@ class RCluster(BaseClusterer): The number of clusters used n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + The number of times the clustering algorithm (e.g., KMeans) + will run with different centroid seeds to avoid poor local optima max_iter: int, default=300 @@ -88,7 +91,7 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - ###MODIFICATION + #MODIFICATION indices = np.array( ( 1, @@ -453,7 +456,7 @@ def __fit_rocket(self, X): quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - ###MODIFICATION + #MODIFICATION quantiles = np.random.permutation(quantiles) biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) @@ -468,7 +471,8 @@ def __transform(self, X, parameters): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] + #, dtype = np.int32) indices = np.array( ( 1, From 966346a60da53d93e252616f41419bc19ccf607b Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 26 Nov 2024 23:00:29 +0000 Subject: [PATCH 011/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index c9564f9e40..089b08dca4 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -19,13 +19,13 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are + These kernels are fixed and pre-defined (not random) and are optimized for computational speed and feature diversity max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive + Dilations control the spacing of the kernel's receptive field over the time series, capturing patterns at varying scales @@ -36,7 +36,7 @@ class RCluster(BaseClusterer): The number of clusters used n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) + The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds to avoid poor local optima @@ -91,7 +91,7 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - #MODIFICATION + # MODIFICATION indices = np.array( ( 1, @@ -456,7 +456,7 @@ def __fit_rocket(self, X): quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - #MODIFICATION + # MODIFICATION quantiles = np.random.permutation(quantiles) biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) @@ -472,7 +472,7 @@ def __transform(self, X, parameters): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] - #, dtype = np.int32) + # , dtype = np.int32) indices = np.array( ( 1, From 46922326f918e99a7c3a0b51c964dd23b57da0bd Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 04:31:41 +0530 Subject: [PATCH 012/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 089b08dca4..c7846c5c6a 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -90,7 +90,8 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) + # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] + #, dtype = np.int32) # MODIFICATION indices = np.array( ( From ac28d6009f13f61cc5c8aed4248847db8ee9d5ac Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 26 Nov 2024 23:02:08 +0000 Subject: [PATCH 013/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index c7846c5c6a..d425120d39 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -91,7 +91,7 @@ def __fit_biases(X, dilations, num_features_per_dilation, quantiles): # equivalent to: # >>> from itertools import combinations # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)] - #, dtype = np.int32) + # , dtype = np.int32) # MODIFICATION indices = np.array( ( From d9db937f28185bcb5c79f67fde89f1d05b47c767 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 15:08:53 +0530 Subject: [PATCH 014/103] used aeon mini rocket --- aeon/clustering/_r_cluster.py | 258 +++------------------------------- 1 file changed, 23 insertions(+), 235 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a33bca38bd..7a97564918 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -5,11 +5,9 @@ from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler - +from aeon.transformations.collection.convolution_based import MiniRocket from aeon.clustering.base import BaseClusterer -from numba import njit, prange - class RCluster(BaseClusterer): """Time series R Clustering implementation . @@ -20,7 +18,8 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + These kernels are fixed and pre-defined (not random) and are optimized for + computational speed and feature diversity max_dilations_per_kernel : int , default = 32 @@ -35,12 +34,16 @@ class RCluster(BaseClusterer): The number of clusters used n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + The number of times the clustering algorithm (e.g., KMeans) will run with + different centroid seeds to avoid poor local optima max_iter: int, default=300 Maximum number of iterations of the k-means algorithm for a single run. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` means using all + processors. random_state: int or np.random.RandomState instance or None, default=None Determines random number generation for centroid initialization. Notes @@ -60,240 +63,21 @@ def __init__(self, n_clusters=8, n_init=10, random_state: Optional[Union[int, RandomState]] = None, - max_iter=300): + max_iter=300, + n_jobs=-1): self.num_features = num_features - self.num_kernels = num_kernels - self.max_dilations_per_kernel = max_dilations_per_kernel self.num_cluster = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - + self.mini_rocket = MiniRocket(n_kernels= num_kernels,max_dilations_per_kernel=max_dilations_per_kernel,n_jobs=n_jobs) + self.fit = False super().__init__() - @staticmethod - @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) - def __fit_biases(X, dilations, num_features_per_dilation, quantiles): - - num_examples, input_length = X.shape - - # equivalent to: - # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - ###MODIFICATION - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) - - num_kernels = len(indices) - num_dilations = len(dilations) - - num_features = num_kernels * np.sum(num_features_per_dilation) - - biases = np.zeros(num_features, dtype=np.float32) - - feature_index_start = 0 - - for dilation_index in range(num_dilations): - - dilation = dilations[dilation_index] - padding = ((9 - 1) * dilation) // 2 - - num_features_this_dilation = num_features_per_dilation[dilation_index] - - for kernel_index in range(num_kernels): - - feature_index_end = feature_index_start + num_features_this_dilation - - _X = X[np.random.randint(num_examples)] - - A = -_X # A = alpha * X = -X - G = _X + _X + _X # G = gamma * X = 3X - - C_alpha = np.zeros(input_length, dtype=np.float32) - C_alpha[:] = A - - C_gamma = np.zeros((9, input_length), dtype=np.float32) - C_gamma[9 // 2] = G - - start = dilation - end = input_length - padding - - for gamma_index in range(9 // 2): - C_alpha[-end:] = C_alpha[-end:] + A[:end] - C_gamma[gamma_index, -end:] = G[:end] - - end += dilation - - for gamma_index in range(9 // 2 + 1, 9): - C_alpha[:-start] = C_alpha[:-start] + A[start:] - C_gamma[gamma_index, :-start] = G[start:] - - start += dilation - - index_0, index_1, index_2 = indices[kernel_index] - - C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - - biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ - feature_index_start:feature_index_end]) - - feature_index_start = feature_index_end - - return biases - - def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): - - num_kernels = 84 - - num_features_per_kernel = num_features // num_kernels - true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) - multiplier = num_features_per_kernel / true_max_dilations_per_kernel - - max_exponent = np.log2((input_length - 1) / (9 - 1)) - dilations, num_features_per_dilation = \ - np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), - return_counts=True) - num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector - - remainder = num_features_per_kernel - np.sum(num_features_per_dilation) - i = 0 - while remainder > 0: - num_features_per_dilation[i] += 1 - remainder -= 1 - i = (i + 1) % len(num_features_per_dilation) - - return dilations, num_features_per_dilation - - def __quantiles(self,n): - return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) - - def __fit_rocket(self,X): - - _, input_length = X.shape - - - dilations, num_features_per_dilation = self.__fit_dilations(input_length, - self.num_features, - self.max_dilations_per_kernel) - - num_features_per_kernel = np.sum(num_features_per_dilation) - - quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - - ###MODIFICATION - quantiles = np.random.permutation(quantiles) - - biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) - - return dilations, num_features_per_dilation, biases - - def __transform(self,X, parameters): - - num_examples, input_length = X.shape - - dilations, num_features_per_dilation, biases = parameters - - # equivalent to: - # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) - - num_kernels = len(indices) - num_dilations = len(dilations) - - num_features = num_kernels * np.sum(num_features_per_dilation) - - features = np.zeros((num_examples, num_features), dtype=np.float32) - - for example_index in prange(num_examples): - - _X = X[example_index] - - A = -_X # A = alpha * X = -X - G = _X + _X + _X # G = gamma * X = 3X - - feature_index_start = 0 - - for dilation_index in range(num_dilations): - - _padding0 = dilation_index % 2 - - dilation = dilations[dilation_index] - padding = ((9 - 1) * dilation) // 2 - - num_features_this_dilation = num_features_per_dilation[dilation_index] - - C_alpha = np.zeros(input_length, dtype=np.float32) - C_alpha[:] = A - - C_gamma = np.zeros((9, input_length), dtype=np.float32) - C_gamma[9 // 2] = G - - start = dilation - end = input_length - padding - - for gamma_index in range(9 // 2): - C_alpha[-end:] = C_alpha[-end:] + A[:end] - C_gamma[gamma_index, -end:] = G[:end] - - end += dilation - - for gamma_index in range(9 // 2 + 1, 9): - C_alpha[:-start] = C_alpha[:-start] + A[start:] - C_gamma[gamma_index, :-start] = G[start:] - - start += dilation - - for kernel_index in range(num_kernels): - - feature_index_end = feature_index_start + num_features_this_dilation - - _padding1 = (_padding0 + kernel_index) % 2 - - index_0, index_1, index_2 = indices[kernel_index] - - C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - - if _padding1 == 0: - for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() - else: - for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() - - - feature_index_start = feature_index_end - - return features def _fit(self,X,y=None): - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -309,13 +93,17 @@ def _fit(self,X,y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter,) self._r_cluster.fit(transformed_data_pca) + self.fit = True def _predict(self, X, y=None) -> np.ndarray: + if not self.fit: + raise ValueError("Data is not fitted. Please fit the model before using it.") + + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -324,8 +112,8 @@ def _predict(self, X, y=None) -> np.ndarray: return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) From 36a8e274c5a01fc1d51ef6344f1f87d324e27e0e Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 15:15:53 +0530 Subject: [PATCH 015/103] used aeon mini rocket --- aeon/clustering/_r_cluster.py | 271 ++++------------------------------ 1 file changed, 30 insertions(+), 241 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a33bca38bd..50ece8a9bf 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -5,12 +5,9 @@ from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler - +from aeon.transformations.collection.convolution_based import MiniRocket from aeon.clustering.base import BaseClusterer -from numba import njit, prange - - class RCluster(BaseClusterer): """Time series R Clustering implementation . @@ -20,7 +17,8 @@ class RCluster(BaseClusterer): ---------- num_kernels : int , default = 84 The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are optimized for computational speed and + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and feature diversity max_dilations_per_kernel : int , default = 32 @@ -35,7 +33,8 @@ class RCluster(BaseClusterer): The number of clusters used n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will run with different centroid seeds + The number of times the clustering algorithm (e.g., KMeans) will + run with different centroid seeds to avoid poor local optima max_iter: int, default=300 @@ -43,6 +42,10 @@ class RCluster(BaseClusterer): run. random_state: int or np.random.RandomState instance or None, default=None Determines random number generation for centroid initialization. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. Notes ----- Adapted from the implementation from source code @@ -53,247 +56,28 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ + def __init__(self, - num_features=500, num_kernels=84, max_dilations_per_kernel=32, n_clusters=8, n_init=10, random_state: Optional[Union[int, RandomState]] = None, - max_iter=300): - self.num_features = num_features - self.num_kernels = num_kernels - self.max_dilations_per_kernel = max_dilations_per_kernel + max_iter=300, + n_jobs=-1): self.num_cluster = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - + self.mini_rocket = MiniRocket(n_kernels=num_kernels, + max_dilations_per_kernel=max_dilations_per_kernel, + n_jobs=n_jobs) + self.fit = False super().__init__() - @staticmethod - @njit("float32[:](float32[:,:],int32[:],int32[:],float32[:])", fastmath=True, parallel=False, cache=True) - def __fit_biases(X, dilations, num_features_per_dilation, quantiles): - - num_examples, input_length = X.shape - - # equivalent to: - # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - ###MODIFICATION - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) - - num_kernels = len(indices) - num_dilations = len(dilations) - - num_features = num_kernels * np.sum(num_features_per_dilation) - - biases = np.zeros(num_features, dtype=np.float32) - - feature_index_start = 0 - - for dilation_index in range(num_dilations): - - dilation = dilations[dilation_index] - padding = ((9 - 1) * dilation) // 2 - - num_features_this_dilation = num_features_per_dilation[dilation_index] - - for kernel_index in range(num_kernels): - - feature_index_end = feature_index_start + num_features_this_dilation - - _X = X[np.random.randint(num_examples)] - - A = -_X # A = alpha * X = -X - G = _X + _X + _X # G = gamma * X = 3X - - C_alpha = np.zeros(input_length, dtype=np.float32) - C_alpha[:] = A - - C_gamma = np.zeros((9, input_length), dtype=np.float32) - C_gamma[9 // 2] = G - - start = dilation - end = input_length - padding - - for gamma_index in range(9 // 2): - C_alpha[-end:] = C_alpha[-end:] + A[:end] - C_gamma[gamma_index, -end:] = G[:end] - - end += dilation - - for gamma_index in range(9 // 2 + 1, 9): - C_alpha[:-start] = C_alpha[:-start] + A[start:] - C_gamma[gamma_index, :-start] = G[start:] - - start += dilation - - index_0, index_1, index_2 = indices[kernel_index] - - C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - - biases[feature_index_start:feature_index_end] = np.quantile(C, quantiles[ - feature_index_start:feature_index_end]) - - feature_index_start = feature_index_end - - return biases - - def __fit_dilations(self,input_length, num_features, max_dilations_per_kernel): - - num_kernels = 84 - - num_features_per_kernel = num_features // num_kernels - true_max_dilations_per_kernel = min(num_features_per_kernel, max_dilations_per_kernel) - multiplier = num_features_per_kernel / true_max_dilations_per_kernel - - max_exponent = np.log2((input_length - 1) / (9 - 1)) - dilations, num_features_per_dilation = \ - np.unique(np.logspace(0, max_exponent, true_max_dilations_per_kernel, base=2).astype(np.int32), - return_counts=True) - num_features_per_dilation = (num_features_per_dilation * multiplier).astype(np.int32) # this is a vector - - remainder = num_features_per_kernel - np.sum(num_features_per_dilation) - i = 0 - while remainder > 0: - num_features_per_dilation[i] += 1 - remainder -= 1 - i = (i + 1) % len(num_features_per_dilation) - - return dilations, num_features_per_dilation - - def __quantiles(self,n): - return np.array([(_ * ((np.sqrt(5) + 1) / 2)) % 1 for _ in range(1, n + 1)], dtype=np.float32) - - def __fit_rocket(self,X): - - _, input_length = X.shape - - - dilations, num_features_per_dilation = self.__fit_dilations(input_length, - self.num_features, - self.max_dilations_per_kernel) - - num_features_per_kernel = np.sum(num_features_per_dilation) - - quantiles = self.__quantiles(self.num_kernels * num_features_per_kernel) - - ###MODIFICATION - quantiles = np.random.permutation(quantiles) - - biases = self.__fit_biases(X, dilations, num_features_per_dilation, quantiles) - - return dilations, num_features_per_dilation, biases - - def __transform(self,X, parameters): - - num_examples, input_length = X.shape - - dilations, num_features_per_dilation, biases = parameters - - # equivalent to: - # >>> from itertools import combinations - # >>> indices = np.array([_ for _ in combinations(np.arange(9), 3)], dtype = np.int32) - indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype=np.int32).reshape(84, 3) - - num_kernels = len(indices) - num_dilations = len(dilations) - - num_features = num_kernels * np.sum(num_features_per_dilation) - - features = np.zeros((num_examples, num_features), dtype=np.float32) - - for example_index in prange(num_examples): - - _X = X[example_index] - - A = -_X # A = alpha * X = -X - G = _X + _X + _X # G = gamma * X = 3X - - feature_index_start = 0 - - for dilation_index in range(num_dilations): - - _padding0 = dilation_index % 2 - - dilation = dilations[dilation_index] - padding = ((9 - 1) * dilation) // 2 - - num_features_this_dilation = num_features_per_dilation[dilation_index] - - C_alpha = np.zeros(input_length, dtype=np.float32) - C_alpha[:] = A - - C_gamma = np.zeros((9, input_length), dtype=np.float32) - C_gamma[9 // 2] = G - - start = dilation - end = input_length - padding - - for gamma_index in range(9 // 2): - C_alpha[-end:] = C_alpha[-end:] + A[:end] - C_gamma[gamma_index, -end:] = G[:end] - - end += dilation - - for gamma_index in range(9 // 2 + 1, 9): - C_alpha[:-start] = C_alpha[:-start] + A[start:] - C_gamma[gamma_index, :-start] = G[start:] - - start += dilation - - for kernel_index in range(num_kernels): - - feature_index_end = feature_index_start + num_features_this_dilation - - _padding1 = (_padding0 + kernel_index) % 2 - - index_0, index_1, index_2 = indices[kernel_index] - - C = C_alpha + C_gamma[index_0] + C_gamma[index_1] + C_gamma[index_2] - - if _padding1 == 0: - for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] = ((C > biases[feature_index_start + feature_count]).astype(float)).mean() - else: - for feature_count in range(num_features_this_dilation): - features[example_index, feature_index_start + feature_count] =((C[padding:-padding] > biases[feature_index_start + feature_count]).astype(float)).mean() - - - feature_index_start = feature_index_end - - return features - - def _fit(self,X,y=None): - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) + def _fit(self, X, y=None): + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -309,13 +93,17 @@ def _fit(self,X,y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter, ) self._r_cluster.fit(transformed_data_pca) + self.fit = True def _predict(self, X, y=None) -> np.ndarray: + if not self.fit: + raise ValueError("Data is not fitted. Please fit the model before using it.") + + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -323,9 +111,10 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data_pca = pca_optimal.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) + def _fit_predict(self, X, y=None) -> np.ndarray: - parameters = self.__fit_rocket(X=X) - transformed_data = self.__transform(X=X, parameters=parameters) + self.mini_rocket.fit(X=X) + transformed_data = self.mini_rocket.transform(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -342,4 +131,4 @@ def _fit_predict(self, X, y=None) -> np.ndarray: n_init=self.n_init, random_state=self.random_state, max_iter=self.max_iter) - return self._r_cluster.fit_predict(transformed_data_pca) + return self._r_cluster.fit_predict(transformed_data_pca) \ No newline at end of file From 72c658b0aca8c75e1c4661ac2b428da64e1754b9 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 27 Nov 2024 09:46:42 +0000 Subject: [PATCH 016/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 139 ++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 64 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 50ece8a9bf..d650486801 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,77 +1,84 @@ +from typing import Optional, Union + import numpy as np from numpy.random import RandomState - -from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler -from aeon.transformations.collection.convolution_based import MiniRocket + from aeon.clustering.base import BaseClusterer +from aeon.transformations.collection.convolution_based import MiniRocket + class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are - optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will - run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - n_jobs : int, default=1 - The number of jobs to run in parallel for `transform`. ``-1`` - means using all - processors. - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - - def __init__(self, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300, - n_jobs=-1): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will + run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. + + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + def __init__( + self, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300, + n_jobs=-1, + ): self.num_cluster = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.mini_rocket = MiniRocket(n_kernels=num_kernels, - max_dilations_per_kernel=max_dilations_per_kernel, - n_jobs=n_jobs) + self.mini_rocket = MiniRocket( + n_kernels=num_kernels, + max_dilations_per_kernel=max_dilations_per_kernel, + n_jobs=n_jobs, + ) self.fit = False super().__init__() @@ -93,13 +100,16 @@ def _fit(self, X, y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter, ) + max_iter=self.max_iter, + ) self._r_cluster.fit(transformed_data_pca) self.fit = True def _predict(self, X, y=None) -> np.ndarray: if not self.fit: - raise ValueError("Data is not fitted. Please fit the model before using it.") + raise ValueError( + "Data is not fitted. Please fit the model before using it." + ) self.mini_rocket.fit(X=X) transformed_data = self.mini_rocket.transform(X=X) @@ -130,5 +140,6 @@ def _fit_predict(self, X, y=None) -> np.ndarray: n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) - return self._r_cluster.fit_predict(transformed_data_pca) \ No newline at end of file + max_iter=self.max_iter, + ) + return self._r_cluster.fit_predict(transformed_data_pca) From afa22e65ac846101f2312e709749d97fa66ddcad Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:20:33 +0530 Subject: [PATCH 017/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index d650486801..197e90c2d2 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -25,8 +25,8 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales + Dilations control the spacing of the kernel's receptive field over the + time series, capturing patterns at varying scales num_features : int , default = 500 The number of features extracted per kernel after applying the transformation From 5fa4c6e724193c2867907f58db5c9aed8784e3cf Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 27 Nov 2024 09:51:08 +0000 Subject: [PATCH 018/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 197e90c2d2..25c08bc0d6 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -25,7 +25,7 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the + Dilations control the spacing of the kernel's receptive field over the time series, capturing patterns at varying scales num_features : int , default = 500 From 00e8b89b02e6a4f876c4d98cdd0a32aad53c5803 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 19:59:47 +0530 Subject: [PATCH 019/103] used and modified aeon mini rocket --- aeon/clustering/_r_cluster.py | 97 ++++++++++++++++++++++--- aeon/clustering/tests/test_r_cluster.py | 34 +++++++++ 2 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 aeon/clustering/tests/test_r_cluster.py diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 50ece8a9bf..a168cd398e 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -5,8 +5,12 @@ from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler -from aeon.transformations.collection.convolution_based import MiniRocket + from aeon.clustering.base import BaseClusterer +from aeon.transformations.collection.convolution_based._minirocket import _fit_dilations,_quantiles,_fit_biases,_static_transform_uni,_static_transform_multi +from aeon.datasets import load_basic_motions +import multiprocessing +from numba import get_num_threads, set_num_threads class RCluster(BaseClusterer): """Time series R Clustering implementation . @@ -65,19 +69,90 @@ def __init__(self, random_state: Optional[Union[int, RandomState]] = None, max_iter=300, n_jobs=-1): + self.n_jobs = n_jobs + self.n_kernels = num_kernels + self.max_dilations_per_kernel = max_dilations_per_kernel self.num_cluster = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.mini_rocket = MiniRocket(n_kernels=num_kernels, - max_dilations_per_kernel=max_dilations_per_kernel, - n_jobs=n_jobs) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.fit = False super().__init__() + def __get_parameterised_data(self,X): + _, n_channels, n_timepoints = X.shape + X = X.astype(np.float32) + + dilations, num_features_per_dilation = _fit_dilations(n_timepoints, self.n_kernels , self.max_dilations_per_kernel) + + num_features_per_kernel = np.sum(num_features_per_dilation) + + quantiles = _quantiles(self.n_kernels * num_features_per_kernel) + + #MODIFICATION + quantiles = np.random.permutation(quantiles) + + n_dilations = len(dilations) + n_combinations = self.n_kernels * n_dilations + max_n_channels = min(n_channels, 9) + max_exponent = np.log2(max_n_channels + 1) + n_channels_per_combination = ( + 2 ** np.random.uniform(0, max_exponent, n_combinations) + ).astype(np.int32) + channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) + n_channels_start = 0 + for combination_index in range(n_combinations): + n_channels_this_combination = n_channels_per_combination[combination_index] + n_channels_end = n_channels_start + n_channels_this_combination + channel_indices[n_channels_start:n_channels_end] = np.random.choice( + n_channels, n_channels_this_combination, replace=False + ) + n_channels_start = n_channels_end + + biases = _fit_biases(X, + n_channels_per_combination, + channel_indices, + dilations, + num_features_per_dilation, + quantiles, + self.indices, + self.random_state,) + + return (np.array([_],dtype=np.int32),np.array([_],dtype=np.int32), dilations, num_features_per_dilation, biases) + def __get_transformed_data(self,X): + X = X.astype(np.float32) + _, n_channels, n_timepoints = X.shape + prev_threads = get_num_threads() + if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): + n_jobs = multiprocessing.cpu_count() + else: + n_jobs = self.n_jobs + set_num_threads(n_jobs) + if n_channels == 1: + X = X.squeeze(1) + X_ = _static_transform_uni(X, self.parameters, self.indices) + else: + X_ = _static_transform_multi(X, self.parameters, self.indices) + set_num_threads(prev_threads) + return X_ def _fit(self, X, y=None): - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -101,8 +176,9 @@ def _predict(self, X, y=None) -> np.ndarray: if not self.fit: raise ValueError("Data is not fitted. Please fit the model before using it.") - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -113,8 +189,9 @@ def _predict(self, X, y=None) -> np.ndarray: return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) diff --git a/aeon/clustering/tests/test_r_cluster.py b/aeon/clustering/tests/test_r_cluster.py new file mode 100644 index 0000000000..716c2df044 --- /dev/null +++ b/aeon/clustering/tests/test_r_cluster.py @@ -0,0 +1,34 @@ +"""Tests for time series R cluster.""" + +import numpy as np +import pytest + +from aeon.clustering._r_cluster import RCluster +from aeon.datasets import load_basic_motions +from aeon.utils.validation._dependencies import _check_estimator_deps + +expected_labels = [0, 2, 1, 2, 0] + +expected_iters = 2 + +expected_results = [0, 0, 0, 0, 0] + +@pytest.mark.skipif( + not _check_estimator_deps( RCluster, severity="none"), + reason="skip test if required soft dependencies not available", +) +def test_kernel_k_means(): + """Test implementation of R cluster.""" + max_train = 5 + + X_train, y_train = load_basic_motions(split="train") + X_test, y_test = load_basic_motions(split="test") + + r_cluster = RCluster( n_clusters=2) + r_cluster.fit(X_train[0:max_train]) + test_shape_result = r_cluster.predict(X_test[0:max_train]) + + + assert np.array_equal(test_shape_result, expected_results) + assert r_cluster.n_iter_ == expected_iters + assert np.array_equal( r_cluster.labels_, expected_labels) From b07777dcb5ef837ed166087a376c7f6512553c0e Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 27 Nov 2024 14:32:19 +0000 Subject: [PATCH 020/103] Automatic `pre-commit` fixes --- aeon/clustering/tests/test_r_cluster.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/tests/test_r_cluster.py b/aeon/clustering/tests/test_r_cluster.py index 716c2df044..1a8778a1fd 100644 --- a/aeon/clustering/tests/test_r_cluster.py +++ b/aeon/clustering/tests/test_r_cluster.py @@ -13,8 +13,9 @@ expected_results = [0, 0, 0, 0, 0] + @pytest.mark.skipif( - not _check_estimator_deps( RCluster, severity="none"), + not _check_estimator_deps(RCluster, severity="none"), reason="skip test if required soft dependencies not available", ) def test_kernel_k_means(): @@ -24,11 +25,10 @@ def test_kernel_k_means(): X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") - r_cluster = RCluster( n_clusters=2) + r_cluster = RCluster(n_clusters=2) r_cluster.fit(X_train[0:max_train]) - test_shape_result = r_cluster.predict(X_test[0:max_train]) - + test_shape_result = r_cluster.predict(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) assert r_cluster.n_iter_ == expected_iters - assert np.array_equal( r_cluster.labels_, expected_labels) + assert np.array_equal(r_cluster.labels_, expected_labels) From 8692ca030120c9f6beae1fc81bee0e85afdcf901 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 20:04:34 +0530 Subject: [PATCH 021/103] used and modified aeon mini rocket --- aeon/clustering/_r_cluster.py | 224 ++++++++++++++++++++++------------ 1 file changed, 145 insertions(+), 79 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 25c08bc0d6..c63e47b453 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,90 +1,158 @@ -from typing import Optional, Union - import numpy as np from numpy.random import RandomState + +from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from aeon.transformations.collection.convolution_based import MiniRocket - +from aeon.transformations.collection.convolution_based._minirocket import _fit_dilations,_quantiles,_fit_biases,_static_transform_uni,_static_transform_multi +from aeon.datasets import load_basic_motions +import multiprocessing +from numba import get_num_threads, set_num_threads class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are - optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the - time series, capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will - run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - n_jobs : int, default=1 - The number of jobs to run in parallel for `transform`. ``-1`` - means using all - processors. - - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - - def __init__( - self, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300, - n_jobs=-1, - ): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will + run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + def __init__(self, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300, + n_jobs=-1): + self.n_jobs = n_jobs + self.n_kernels = num_kernels + self.max_dilations_per_kernel = max_dilations_per_kernel self.num_cluster = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.mini_rocket = MiniRocket( - n_kernels=num_kernels, - max_dilations_per_kernel=max_dilations_per_kernel, - n_jobs=n_jobs, - ) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.fit = False super().__init__() + def __get_parameterised_data(self,X): + _, n_channels, n_timepoints = X.shape + X = X.astype(np.float32) + + dilations, num_features_per_dilation = _fit_dilations(n_timepoints, self.n_kernels , self.max_dilations_per_kernel) + + num_features_per_kernel = np.sum(num_features_per_dilation) + + quantiles = _quantiles(self.n_kernels * num_features_per_kernel) + + #MODIFICATION + quantiles = np.random.permutation(quantiles) + + n_dilations = len(dilations) + n_combinations = self.n_kernels * n_dilations + max_n_channels = min(n_channels, 9) + max_exponent = np.log2(max_n_channels + 1) + n_channels_per_combination = ( + 2 ** np.random.uniform(0, max_exponent, n_combinations) + ).astype(np.int32) + channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) + n_channels_start = 0 + for combination_index in range(n_combinations): + n_channels_this_combination = n_channels_per_combination[combination_index] + n_channels_end = n_channels_start + n_channels_this_combination + channel_indices[n_channels_start:n_channels_end] = np.random.choice( + n_channels, n_channels_this_combination, replace=False + ) + n_channels_start = n_channels_end + + biases = _fit_biases(X, + n_channels_per_combination, + channel_indices, + dilations, + num_features_per_dilation, + quantiles, + self.indices, + self.random_state,) + + return (np.array([_],dtype=np.int32),np.array([_],dtype=np.int32), dilations, num_features_per_dilation, biases) + def __get_transformed_data(self,X): + X = X.astype(np.float32) + _, n_channels, n_timepoints = X.shape + prev_threads = get_num_threads() + if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): + n_jobs = multiprocessing.cpu_count() + else: + n_jobs = self.n_jobs + set_num_threads(n_jobs) + if n_channels == 1: + X = X.squeeze(1) + X_ = _static_transform_uni(X, self.parameters, self.indices) + else: + X_ = _static_transform_multi(X, self.parameters, self.indices) + set_num_threads(prev_threads) + return X_ def _fit(self, X, y=None): - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -100,19 +168,17 @@ def _fit(self, X, y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter, - ) + max_iter=self.max_iter, ) self._r_cluster.fit(transformed_data_pca) self.fit = True def _predict(self, X, y=None) -> np.ndarray: if not self.fit: - raise ValueError( - "Data is not fitted. Please fit the model before using it." - ) + raise ValueError("Data is not fitted. Please fit the model before using it.") - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -123,8 +189,9 @@ def _predict(self, X, y=None) -> np.ndarray: return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - self.mini_rocket.fit(X=X) - transformed_data = self.mini_rocket.transform(X=X) + self.parameters = self.__get_parameterised_data(X) + + transformed_data = self.__get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -140,6 +207,5 @@ def _fit_predict(self, X, y=None) -> np.ndarray: n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter, - ) + max_iter=self.max_iter) return self._r_cluster.fit_predict(transformed_data_pca) From 6bfc0a8a60eace109d711ebb387f3425c7130242 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 27 Nov 2024 14:36:14 +0000 Subject: [PATCH 022/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 451 +++++++++++++++++++++++++++------- 1 file changed, 360 insertions(+), 91 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index c63e47b453..a13fd7ded6 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,74 +1,84 @@ +import multiprocessing +from typing import Optional, Union + import numpy as np +from numba import get_num_threads, set_num_threads from numpy.random import RandomState - -from typing import Optional, Union from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from aeon.transformations.collection.convolution_based._minirocket import _fit_dilations,_quantiles,_fit_biases,_static_transform_uni,_static_transform_multi from aeon.datasets import load_basic_motions -import multiprocessing -from numba import get_num_threads, set_num_threads +from aeon.transformations.collection.convolution_based._minirocket import ( + _fit_biases, + _fit_dilations, + _quantiles, + _static_transform_multi, + _static_transform_uni, +) + class RCluster(BaseClusterer): """Time series R Clustering implementation . - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are - optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will - run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - n_jobs : int, default=1 - The number of jobs to run in parallel for `transform`. ``-1`` - means using all - processors. - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - - def __init__(self, - num_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state: Optional[Union[int, RandomState]] = None, - max_iter=300, - n_jobs=-1): + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and + feature diversity + + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales + + num_features : int , default = 500 + The number of features extracted per kernel after applying the transformation + + num_cluster : int , default = 8 + The number of clusters used + + n_init : int , default = 10 + The number of times the clustering algorithm (e.g., KMeans) will + run with different centroid seeds + to avoid poor local optima + + max_iter: int, default=300 + Maximum number of iterations of the k-means algorithm for a single + run. + random_state: int or np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. + + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + def __init__( + self, + num_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + random_state: Optional[Union[int, RandomState]] = None, + max_iter=300, + n_jobs=-1, + ): self.n_jobs = n_jobs self.n_kernels = num_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -76,33 +86,279 @@ def __init__(self, self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.fit = False super().__init__() - def __get_parameterised_data(self,X): + + def __get_parameterised_data(self, X): _, n_channels, n_timepoints = X.shape X = X.astype(np.float32) - dilations, num_features_per_dilation = _fit_dilations(n_timepoints, self.n_kernels , self.max_dilations_per_kernel) + dilations, num_features_per_dilation = _fit_dilations( + n_timepoints, self.n_kernels, self.max_dilations_per_kernel + ) num_features_per_kernel = np.sum(num_features_per_dilation) quantiles = _quantiles(self.n_kernels * num_features_per_kernel) - #MODIFICATION + # MODIFICATION quantiles = np.random.permutation(quantiles) n_dilations = len(dilations) @@ -110,7 +366,7 @@ def __get_parameterised_data(self,X): max_n_channels = min(n_channels, 9) max_exponent = np.log2(max_n_channels + 1) n_channels_per_combination = ( - 2 ** np.random.uniform(0, max_exponent, n_combinations) + 2 ** np.random.uniform(0, max_exponent, n_combinations) ).astype(np.int32) channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) n_channels_start = 0 @@ -122,17 +378,26 @@ def __get_parameterised_data(self,X): ) n_channels_start = n_channels_end - biases = _fit_biases(X, - n_channels_per_combination, - channel_indices, - dilations, - num_features_per_dilation, - quantiles, - self.indices, - self.random_state,) - - return (np.array([_],dtype=np.int32),np.array([_],dtype=np.int32), dilations, num_features_per_dilation, biases) - def __get_transformed_data(self,X): + biases = _fit_biases( + X, + n_channels_per_combination, + channel_indices, + dilations, + num_features_per_dilation, + quantiles, + self.indices, + self.random_state, + ) + + return ( + np.array([_], dtype=np.int32), + np.array([_], dtype=np.int32), + dilations, + num_features_per_dilation, + biases, + ) + + def __get_transformed_data(self, X): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -168,13 +433,16 @@ def _fit(self, X, y=None): n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter, ) + max_iter=self.max_iter, + ) self._r_cluster.fit(transformed_data_pca) self.fit = True def _predict(self, X, y=None) -> np.ndarray: if not self.fit: - raise ValueError("Data is not fitted. Please fit the model before using it.") + raise ValueError( + "Data is not fitted. Please fit the model before using it." + ) self.parameters = self.__get_parameterised_data(X) @@ -207,5 +475,6 @@ def _fit_predict(self, X, y=None) -> np.ndarray: n_clusters=self.num_cluster, n_init=self.n_init, random_state=self.random_state, - max_iter=self.max_iter) + max_iter=self.max_iter, + ) return self._r_cluster.fit_predict(transformed_data_pca) From 4670e5150a249712dbee3f983c1ee3a1f3981b27 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:07:58 +0530 Subject: [PATCH 023/103] Update test_r_cluster.py --- aeon/clustering/tests/test_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/tests/test_r_cluster.py b/aeon/clustering/tests/test_r_cluster.py index 1a8778a1fd..4b1f0e73ab 100644 --- a/aeon/clustering/tests/test_r_cluster.py +++ b/aeon/clustering/tests/test_r_cluster.py @@ -18,7 +18,7 @@ not _check_estimator_deps(RCluster, severity="none"), reason="skip test if required soft dependencies not available", ) -def test_kernel_k_means(): +def test_r_cluster(): """Test implementation of R cluster.""" max_train = 5 From db1cf12098a533cedadf2ffa5df9699fe99ea7a8 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:08:22 +0530 Subject: [PATCH 024/103] Update test_r_cluster.py --- aeon/clustering/tests/test_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/tests/test_r_cluster.py b/aeon/clustering/tests/test_r_cluster.py index 4b1f0e73ab..69aaa8e614 100644 --- a/aeon/clustering/tests/test_r_cluster.py +++ b/aeon/clustering/tests/test_r_cluster.py @@ -26,8 +26,8 @@ def test_r_cluster(): X_test, y_test = load_basic_motions(split="test") r_cluster = RCluster(n_clusters=2) - r_cluster.fit(X_train[0:max_train]) - test_shape_result = r_cluster.predict(X_test[0:max_train]) + r_cluster._fit(X_train[0:max_train]) + test_shape_result = r_cluster._predict(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) assert r_cluster.n_iter_ == expected_iters From 8a6c626d320db2b28647573545c64318b8846167 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Wed, 27 Nov 2024 20:12:33 +0530 Subject: [PATCH 025/103] used and modified aeon mini rocket --- aeon/clustering/tests/test_r_cluster.py | 34 ------------------------- 1 file changed, 34 deletions(-) delete mode 100644 aeon/clustering/tests/test_r_cluster.py diff --git a/aeon/clustering/tests/test_r_cluster.py b/aeon/clustering/tests/test_r_cluster.py deleted file mode 100644 index 69aaa8e614..0000000000 --- a/aeon/clustering/tests/test_r_cluster.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Tests for time series R cluster.""" - -import numpy as np -import pytest - -from aeon.clustering._r_cluster import RCluster -from aeon.datasets import load_basic_motions -from aeon.utils.validation._dependencies import _check_estimator_deps - -expected_labels = [0, 2, 1, 2, 0] - -expected_iters = 2 - -expected_results = [0, 0, 0, 0, 0] - - -@pytest.mark.skipif( - not _check_estimator_deps(RCluster, severity="none"), - reason="skip test if required soft dependencies not available", -) -def test_r_cluster(): - """Test implementation of R cluster.""" - max_train = 5 - - X_train, y_train = load_basic_motions(split="train") - X_test, y_test = load_basic_motions(split="test") - - r_cluster = RCluster(n_clusters=2) - r_cluster._fit(X_train[0:max_train]) - test_shape_result = r_cluster._predict(X_test[0:max_train]) - - assert np.array_equal(test_shape_result, expected_results) - assert r_cluster.n_iter_ == expected_iters - assert np.array_equal(r_cluster.labels_, expected_labels) From 1647cd8b690c26bd6bb00377618fa533d14bc014 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:15:32 +0530 Subject: [PATCH 026/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a13fd7ded6..f037166552 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -9,7 +9,6 @@ from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from aeon.datasets import load_basic_motions from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, _fit_dilations, From ea8c2d9126c5a0b0487837d8201c0324fd66999e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:34:25 +0530 Subject: [PATCH 027/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index f037166552..e8e980d9b7 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -33,8 +33,8 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales + Dilations control the spacing of the kernel's receptive field over + the time series,capturing patterns at varying scales num_features : int , default = 500 The number of features extracted per kernel after applying the transformation From a959a78b8f945447d2cd735fe98735bb30a2a974 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 27 Nov 2024 15:05:00 +0000 Subject: [PATCH 028/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index e8e980d9b7..47deecc695 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -33,7 +33,7 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over + Dilations control the spacing of the kernel's receptive field over the time series,capturing patterns at varying scales num_features : int , default = 500 From 3ad0d519a5b38dbb9d70e966ab27931b110ad9ed Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 00:48:54 +0530 Subject: [PATCH 029/103] updated variables name --- aeon/clustering/_r_cluster.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 47deecc695..7e53eb4149 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -9,6 +9,7 @@ from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer +from aeon.datasets import load_basic_motions from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, _fit_dilations, @@ -33,8 +34,8 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over - the time series,capturing patterns at varying scales + Dilations control the spacing of the kernel's receptive field over the time series, + capturing patterns at varying scales num_features : int , default = 500 The number of features extracted per kernel after applying the transformation @@ -70,7 +71,7 @@ class RCluster(BaseClusterer): def __init__( self, - num_kernels=84, + n_kernels=84, max_dilations_per_kernel=32, n_clusters=8, n_init=10, @@ -79,9 +80,9 @@ def __init__( n_jobs=-1, ): self.n_jobs = n_jobs - self.n_kernels = num_kernels + self.n_kernels = n_kernels self.max_dilations_per_kernel = max_dilations_per_kernel - self.num_cluster = n_clusters + self.n_clusters = n_clusters self.n_init = n_init self.random_state = random_state self.max_iter = max_iter @@ -345,7 +346,7 @@ def __init__( self.fit = False super().__init__() - def __get_parameterised_data(self, X): + def _get_parameterised_data(self, X): _, n_channels, n_timepoints = X.shape X = X.astype(np.float32) @@ -396,7 +397,7 @@ def __get_parameterised_data(self, X): biases, ) - def __get_transformed_data(self, X): + def _get_transformed_data(self, X): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -414,9 +415,9 @@ def __get_transformed_data(self, X): return X_ def _fit(self, X, y=None): - self.parameters = self.__get_parameterised_data(X) + self.parameters = self._get_parameterised_data(X) - transformed_data = self.__get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -429,7 +430,7 @@ def _fit(self, X, y=None): transformed_data_pca = pca_optimal.fit_transform(X_std) self._r_cluster = KMeans( - n_clusters=self.num_cluster, + n_clusters=self.n_clusters, n_init=self.n_init, random_state=self.random_state, max_iter=self.max_iter, @@ -443,9 +444,9 @@ def _predict(self, X, y=None) -> np.ndarray: "Data is not fitted. Please fit the model before using it." ) - self.parameters = self.__get_parameterised_data(X) + self.parameters = self._get_parameterised_data(X) - transformed_data = self.__get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -456,9 +457,9 @@ def _predict(self, X, y=None) -> np.ndarray: return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - self.parameters = self.__get_parameterised_data(X) + self.parameters = self._get_parameterised_data(X) - transformed_data = self.__get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X) sc = StandardScaler() X_std = sc.fit_transform(transformed_data) @@ -471,7 +472,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: transformed_data_pca = pca_optimal.fit_transform(X_std) self._r_cluster = KMeans( - n_clusters=self.num_cluster, + n_clusters=self.n_clusters, n_init=self.n_init, random_state=self.random_state, max_iter=self.max_iter, From 747c84aa8547c9d841a7241b2f9418f7a6f30fc4 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 00:55:23 +0530 Subject: [PATCH 030/103] updated r_cluster --- aeon/clustering/_r_cluster.py | 272 ++-------------------------------- 1 file changed, 14 insertions(+), 258 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 7e53eb4149..cbd3ff21d1 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -9,7 +9,6 @@ from sklearn.preprocessing import StandardScaler from aeon.clustering.base import BaseClusterer -from aeon.datasets import load_basic_motions from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, _fit_dilations, @@ -86,263 +85,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.fit = False super().__init__() From edff88e6605cdd931a970e51b225d0f683a5219c Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 19:26:34 +0000 Subject: [PATCH 031/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 271 ++++++++++++++++++++++++++++++++-- 1 file changed, 257 insertions(+), 14 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index cbd3ff21d1..96ae3fef3e 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -85,20 +85,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.fit = False super().__init__() From 6a0eedbcbcb09b980f302bda87bd0e07f721ca32 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 00:59:33 +0530 Subject: [PATCH 032/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 96ae3fef3e..25d803a722 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -33,8 +33,8 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over the time series, - capturing patterns at varying scales + Dilations control the spacing of the kernel's receptive field over + the time series,capturing patterns at varying scales num_features : int , default = 500 The number of features extracted per kernel after applying the transformation From 7d6af3b91e3054c04ee9550985c904e0eb15ed57 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 19:30:02 +0000 Subject: [PATCH 033/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 25d803a722..b4b0e16043 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -33,7 +33,7 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over + Dilations control the spacing of the kernel's receptive field over the time series,capturing patterns at varying scales num_features : int , default = 500 From 94cdc116336af910437b7214390881387ab18b7a Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 01:46:49 +0530 Subject: [PATCH 034/103] updated to fix some testing issues --- aeon/clustering/_r_cluster.py | 332 ++++++---------------------------- 1 file changed, 57 insertions(+), 275 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index b4b0e16043..32abdb6aaf 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -33,11 +33,12 @@ class RCluster(BaseClusterer): max_dilations_per_kernel : int , default = 32 The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field over - the time series,capturing patterns at varying scales + Dilations control the spacing of the kernel's receptive field + over the time series,capturing patterns at varying scales num_features : int , default = 500 - The number of features extracted per kernel after applying the transformation + The number of features extracted per kernel after applying + the transformation num_cluster : int , default = 8 The number of clusters used @@ -67,6 +68,10 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ + _tags = { + "capability:multivariate": True, + "capability: multithreading": True + } def __init__( self, @@ -85,265 +90,28 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) - self.fit = False + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) + self.is_fitted = False super().__init__() + self._r_cluster = KMeans( + n_clusters=self.n_clusters, + n_init=self.n_init, + random_state=self.random_state, + max_iter=self.max_iter, + ) def _get_parameterised_data(self, X): _, n_channels, n_timepoints = X.shape @@ -428,17 +196,11 @@ def _fit(self, X, y=None): pca_optimal = PCA(n_components=self.optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) - self._r_cluster = KMeans( - n_clusters=self.n_clusters, - n_init=self.n_init, - random_state=self.random_state, - max_iter=self.max_iter, - ) self._r_cluster.fit(transformed_data_pca) - self.fit = True + self.is_fitted = True def _predict(self, X, y=None) -> np.ndarray: - if not self.fit: + if not self.is_fitted: raise ValueError( "Data is not fitted. Please fit the model before using it." ) @@ -470,10 +232,30 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca_optimal = PCA(n_components=optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) - self._r_cluster = KMeans( - n_clusters=self.n_clusters, - n_init=self.n_init, - random_state=self.random_state, - max_iter=self.max_iter, - ) return self._r_cluster.fit_predict(transformed_data_pca) + + @classmethod + def _get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "n_clusters": 2, + "init": "random", + "n_init": 1, + "max_iter": 1, + "random_state": 1, + } \ No newline at end of file From 12d759123146e22743c51a2a5158013be00fe883 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 20:17:28 +0000 Subject: [PATCH 035/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 279 +++++++++++++++++++++++++++++++--- 1 file changed, 260 insertions(+), 19 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 32abdb6aaf..30bea672ca 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,10 +68,8 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - _tags = { - "capability:multivariate": True, - "capability: multithreading": True - } + + _tags = {"capability:multivariate": True, "capability: multithreading": True} def __init__( self, @@ -90,20 +88,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -258,4 +499,4 @@ def _get_test_params(cls, parameter_set="default"): "n_init": 1, "max_iter": 1, "random_state": 1, - } \ No newline at end of file + } From 270e3a011f8512cc345c18ddc9ca62669db27a5d Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 01:54:51 +0530 Subject: [PATCH 036/103] updated to fix some testing issues --- aeon/clustering/_r_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 32abdb6aaf..4b8c91708e 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -254,7 +254,6 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init": "random", "n_init": 1, "max_iter": 1, "random_state": 1, From 21927dd5fc0d8cbe780a3fb0209ced7413163090 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 01:55:25 +0530 Subject: [PATCH 037/103] updated to fix some testing issues --- aeon/clustering/_r_cluster.py | 279 +++------------------------------- 1 file changed, 19 insertions(+), 260 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index f94d204eb7..4b8c91708e 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,8 +68,10 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - - _tags = {"capability:multivariate": True, "capability: multithreading": True} + _tags = { + "capability:multivariate": True, + "capability: multithreading": True + } def __init__( self, @@ -88,263 +90,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -498,4 +257,4 @@ def _get_test_params(cls, parameter_set="default"): "n_init": 1, "max_iter": 1, "random_state": 1, - } + } \ No newline at end of file From 7772527fb0af1437f0a512f58df00eeea86f050e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 01:56:16 +0530 Subject: [PATCH 038/103] Update _r_cluster.py to fix some issues --- aeon/clustering/_r_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 30bea672ca..f94d204eb7 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -495,7 +495,6 @@ def _get_test_params(cls, parameter_set="default"): """ return { "n_clusters": 2, - "init": "random", "n_init": 1, "max_iter": 1, "random_state": 1, From e62b0dd1fd9af89305cb8b6713336f79370eee09 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 30 Nov 2024 02:05:45 +0530 Subject: [PATCH 039/103] updated to fix some testing issues --- aeon/clustering/_r_cluster.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 4b8c91708e..c308569534 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -70,7 +70,7 @@ class RCluster(BaseClusterer): """ _tags = { "capability:multivariate": True, - "capability: multithreading": True + "capability:multithreading": True } def __init__( @@ -232,29 +232,4 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca_optimal = PCA(n_components=optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) - return self._r_cluster.fit_predict(transformed_data_pca) - - @classmethod - def _get_test_params(cls, parameter_set="default"): - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - - - Returns - ------- - params : dict or list of dict, default={} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - """ - return { - "n_clusters": 2, - "n_init": 1, - "max_iter": 1, - "random_state": 1, - } \ No newline at end of file + return self._r_cluster.fit_predict(transformed_data_pca) \ No newline at end of file From 3d9df3d9ceb8fb4623db2aeea3bec698961a4a90 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 02:06:52 +0530 Subject: [PATCH 040/103] Update _r_cluster.py for fixing test issues --- aeon/clustering/_r_cluster.py | 302 ++-------------------------------- 1 file changed, 18 insertions(+), 284 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index f94d204eb7..a3ef6dc987 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,8 +68,10 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - - _tags = {"capability:multivariate": True, "capability: multithreading": True} + _tags = { + "capability:multivariate": True, + "capability:multithreading": True + } def __init__( self, @@ -88,263 +90,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -474,28 +233,3 @@ def _fit_predict(self, X, y=None) -> np.ndarray: transformed_data_pca = pca_optimal.fit_transform(X_std) return self._r_cluster.fit_predict(transformed_data_pca) - - @classmethod - def _get_test_params(cls, parameter_set="default"): - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - - - Returns - ------- - params : dict or list of dict, default={} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - """ - return { - "n_clusters": 2, - "n_init": 1, - "max_iter": 1, - "random_state": 1, - } From 10e86221d7368f894f3be64694bace4cf91724d8 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 20:37:26 +0000 Subject: [PATCH 041/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 277 +++++++++++++++++++++++++++++++--- 1 file changed, 259 insertions(+), 18 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a3ef6dc987..e2ef0f4dbc 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,10 +68,8 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - _tags = { - "capability:multivariate": True, - "capability:multithreading": True - } + + _tags = {"capability:multivariate": True, "capability:multithreading": True} def __init__( self, @@ -90,20 +88,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( From e76b667b10c2c8805ada4afaecbbb76a1dfdaf85 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 03:56:18 +0530 Subject: [PATCH 042/103] updating to fix test failures --- aeon/clustering/_r_cluster.py | 291 ++++------------------------------ 1 file changed, 27 insertions(+), 264 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index e2ef0f4dbc..a416dfff1f 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,8 +68,10 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - - _tags = {"capability:multivariate": True, "capability:multithreading": True} + _tags = { + "capability:multivariate": True, + "capability:multithreading": True + } def __init__( self, @@ -79,7 +81,7 @@ def __init__( n_init=10, random_state: Optional[Union[int, RandomState]] = None, max_iter=300, - n_jobs=-1, + n_jobs=1, ): self.n_jobs = n_jobs self.n_kernels = n_kernels @@ -88,263 +90,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -432,9 +191,13 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) - self.optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - - pca_optimal = PCA(n_components=self.optimal_dimensions) + self.__optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + self.__optimal_dimensions = min(self.__optimal_dimensions, X.shape[0], X.shape[1]) + if(self.__optimal_dimensions==0): + raise ValueError( + f"Optimal dimensions {self.__optimal_dimensions} must be greater than 0." + ) + pca_optimal = PCA(n_components=self.__optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) @@ -453,7 +216,7 @@ def _predict(self, X, y=None) -> np.ndarray: sc = StandardScaler() X_std = sc.fit_transform(transformed_data) - pca_optimal = PCA(n_components=self.optimal_dimensions) + pca_optimal = PCA(n_components=self.__optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) From 7544c653b085079d540e7b316f41d24003f999c1 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 22:26:51 +0000 Subject: [PATCH 043/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 283 +++++++++++++++++++++++++++++++--- 1 file changed, 263 insertions(+), 20 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a416dfff1f..a5c47d0ae2 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,10 +68,8 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - _tags = { - "capability:multivariate": True, - "capability:multithreading": True - } + + _tags = {"capability:multivariate": True, "capability:multithreading": True} def __init__( self, @@ -90,20 +88,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -192,8 +433,10 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) self.__optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - self.__optimal_dimensions = min(self.__optimal_dimensions, X.shape[0], X.shape[1]) - if(self.__optimal_dimensions==0): + self.__optimal_dimensions = min( + self.__optimal_dimensions, X.shape[0], X.shape[1] + ) + if self.__optimal_dimensions == 0: raise ValueError( f"Optimal dimensions {self.__optimal_dimensions} must be greater than 0." ) From b7a52cca395aa85a515201f3d9be00fb382be739 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 04:07:51 +0530 Subject: [PATCH 044/103] updating to fix test failures --- aeon/clustering/_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index a5c47d0ae2..ee2377ab32 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -437,8 +437,8 @@ def _fit(self, X, y=None): self.__optimal_dimensions, X.shape[0], X.shape[1] ) if self.__optimal_dimensions == 0: - raise ValueError( - f"Optimal dimensions {self.__optimal_dimensions} must be greater than 0." + raise ValueError( + f"Optimal dimensions must be greater than 0." ) pca_optimal = PCA(n_components=self.__optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) From 2f37e10a7fb868deb670e7f6570a6238f298c0a1 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 29 Nov 2024 22:38:18 +0000 Subject: [PATCH 045/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index ee2377ab32..397b6e4b19 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -437,9 +437,7 @@ def _fit(self, X, y=None): self.__optimal_dimensions, X.shape[0], X.shape[1] ) if self.__optimal_dimensions == 0: - raise ValueError( - f"Optimal dimensions must be greater than 0." - ) + raise ValueError(f"Optimal dimensions must be greater than 0.") pca_optimal = PCA(n_components=self.__optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) From b1421d9f47251e92f3a35dd6ac50b4427623ea06 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 04:10:20 +0530 Subject: [PATCH 046/103] update to fix test issues --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 397b6e4b19..832b1e9279 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -437,7 +437,7 @@ def _fit(self, X, y=None): self.__optimal_dimensions, X.shape[0], X.shape[1] ) if self.__optimal_dimensions == 0: - raise ValueError(f"Optimal dimensions must be greater than 0.") + raise ValueError("Optimal dimensions must be greater than 0.") pca_optimal = PCA(n_components=self.__optimal_dimensions) transformed_data_pca = pca_optimal.fit_transform(X_std) From d1238da627569663fe01a37192af4484c3821094 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 17:41:43 +0530 Subject: [PATCH 047/103] Update _r_cluster.py to fix test issues --- aeon/clustering/_r_cluster.py | 337 +++++----------------------------- 1 file changed, 49 insertions(+), 288 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 832b1e9279..9660712ba5 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,8 +68,10 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - - _tags = {"capability:multivariate": True, "capability:multithreading": True} + _tags = { + "capability:multivariate": True, + "capability:multithreading": True + } def __init__( self, @@ -88,263 +90,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -405,7 +164,7 @@ def _get_parameterised_data(self, X): biases, ) - def _get_transformed_data(self, X): + def _get_transformed_data(self, X,parameters): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -416,30 +175,29 @@ def _get_transformed_data(self, X): set_num_threads(n_jobs) if n_channels == 1: X = X.squeeze(1) - X_ = _static_transform_uni(X, self.parameters, self.indices) + X_ = _static_transform_uni(X, parameters, self.indices) else: - X_ = _static_transform_multi(X, self.parameters, self.indices) + X_ = _static_transform_multi(X, parameters, self.indices) set_num_threads(prev_threads) return X_ def _fit(self, X, y=None): - self.parameters = self._get_parameterised_data(X) + parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X,parameters=parameters) - sc = StandardScaler() - X_std = sc.fit_transform(transformed_data) + self.scaler = StandardScaler() + X_std = self.scaler.fit_transform(transformed_data) pca = PCA().fit(X_std) + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - self.__optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - self.__optimal_dimensions = min( - self.__optimal_dimensions, X.shape[0], X.shape[1] - ) - if self.__optimal_dimensions == 0: - raise ValueError("Optimal dimensions must be greater than 0.") - pca_optimal = PCA(n_components=self.__optimal_dimensions) - transformed_data_pca = pca_optimal.fit_transform(X_std) + if np.all(pca.explained_variance_ratio_ >= 0.01): + optimal_dimensions = X.shape[1] + else: + optimal_dimensions = min(optimal_dimensions, X.shape[0], X.shape[1]) + self.pca = PCA(n_components=optimal_dimensions,random_state=self.random_state) + transformed_data_pca = self.pca.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) self.is_fitted = True @@ -450,31 +208,34 @@ def _predict(self, X, y=None) -> np.ndarray: "Data is not fitted. Please fit the model before using it." ) - self.parameters = self._get_parameterised_data(X) + parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X,parameters=parameters) - sc = StandardScaler() - X_std = sc.fit_transform(transformed_data) + X_std = self.scaler.fit_transform(transformed_data) - pca_optimal = PCA(n_components=self.__optimal_dimensions) - transformed_data_pca = pca_optimal.fit_transform(X_std) + transformed_data_pca = self.pca.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - self.parameters = self._get_parameterised_data(X) + parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) - sc = StandardScaler() - X_std = sc.fit_transform(transformed_data) + self.scaler = StandardScaler() + X_std = self.scaler.fit_transform(transformed_data) pca = PCA().fit(X_std) - optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - pca_optimal = PCA(n_components=optimal_dimensions) - transformed_data_pca = pca_optimal.fit_transform(X_std) + if np.all(pca.explained_variance_ratio_ >= 0.01): + optimal_dimensions = X.shape[1] + else: + optimal_dimensions = min(optimal_dimensions, X.shape[0], X.shape[1]) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + transformed_data_pca = self.pca.fit_transform(X_std) - return self._r_cluster.fit_predict(transformed_data_pca) + self._r_cluster.fit(transformed_data_pca) + self.is_fitted = True + return self._r_cluster.predict(transformed_data_pca) From d4f80fcaa919eeaaeb3d4003753ec2e755ad5724 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 30 Nov 2024 12:12:19 +0000 Subject: [PATCH 048/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 285 +++++++++++++++++++++++++++++++--- 1 file changed, 263 insertions(+), 22 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 9660712ba5..75b30162fa 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,10 +68,8 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - _tags = { - "capability:multivariate": True, - "capability:multithreading": True - } + + _tags = {"capability:multivariate": True, "capability:multithreading": True} def __init__( self, @@ -90,20 +88,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -164,7 +405,7 @@ def _get_parameterised_data(self, X): biases, ) - def _get_transformed_data(self, X,parameters): + def _get_transformed_data(self, X, parameters): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -184,7 +425,7 @@ def _get_transformed_data(self, X,parameters): def _fit(self, X, y=None): parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X,parameters=parameters) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) self.scaler = StandardScaler() X_std = self.scaler.fit_transform(transformed_data) @@ -196,7 +437,7 @@ def _fit(self, X, y=None): optimal_dimensions = X.shape[1] else: optimal_dimensions = min(optimal_dimensions, X.shape[0], X.shape[1]) - self.pca = PCA(n_components=optimal_dimensions,random_state=self.random_state) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) @@ -210,7 +451,7 @@ def _predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X,parameters=parameters) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) From 77ffca307426dbbd1f572aca346317f78006029e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:15:19 +0530 Subject: [PATCH 049/103] Update _r_cluster.py to fix test issues --- aeon/clustering/_r_cluster.py | 318 ++++------------------------------ 1 file changed, 29 insertions(+), 289 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 75b30162fa..7c240b2832 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,8 +68,12 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ - - _tags = {"capability:multivariate": True, "capability:multithreading": True} + _tags = { + "capability:multivariate": True, + "capability:multithreading": True, + "capability:unequal_length": False, + "capability:missing_values": False, + } def __init__( self, @@ -88,263 +92,20 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -405,7 +166,7 @@ def _get_parameterised_data(self, X): biases, ) - def _get_transformed_data(self, X, parameters): + def _get_transformed_data(self, X,parameters): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -425,7 +186,7 @@ def _get_transformed_data(self, X, parameters): def _fit(self, X, y=None): parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X, parameters=parameters) + transformed_data = self._get_transformed_data(X=X,parameters=parameters) self.scaler = StandardScaler() X_std = self.scaler.fit_transform(transformed_data) @@ -433,11 +194,9 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - if np.all(pca.explained_variance_ratio_ >= 0.01): - optimal_dimensions = X.shape[1] - else: - optimal_dimensions = min(optimal_dimensions, X.shape[0], X.shape[1]) - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) + + self.pca = PCA(n_components=optimal_dimensions,random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) @@ -451,32 +210,13 @@ def _predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X, parameters=parameters) + transformed_data = self._get_transformed_data(X=X,parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - - transformed_data_pca = self.pca.fit_transform(X_std) + transformed_data_pca = self.pca.transform(X_std) return self._r_cluster.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - parameters = self._get_parameterised_data(X) - - transformed_data = self._get_transformed_data(X=X, parameters=parameters) - - self.scaler = StandardScaler() - X_std = self.scaler.fit_transform(transformed_data) - - pca = PCA().fit(X_std) - optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - - if np.all(pca.explained_variance_ratio_ >= 0.01): - optimal_dimensions = X.shape[1] - else: - optimal_dimensions = min(optimal_dimensions, X.shape[0], X.shape[1]) - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) - transformed_data_pca = self.pca.fit_transform(X_std) - - self._r_cluster.fit(transformed_data_pca) - self.is_fitted = True - return self._r_cluster.predict(transformed_data_pca) + self._fit(X, y) + return self._predict(X, y) From 28a36ac1cccb5d4df50ce0535a5499de0c1c74ff Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 30 Nov 2024 12:45:49 +0000 Subject: [PATCH 050/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 280 +++++++++++++++++++++++++++++++--- 1 file changed, 262 insertions(+), 18 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 7c240b2832..be00336823 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -68,6 +68,7 @@ class RCluster(BaseClusterer): .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ + _tags = { "capability:multivariate": True, "capability:multithreading": True, @@ -92,20 +93,263 @@ def __init__( self.n_init = n_init self.random_state = random_state self.max_iter = max_iter - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() self._r_cluster = KMeans( @@ -166,7 +410,7 @@ def _get_parameterised_data(self, X): biases, ) - def _get_transformed_data(self, X,parameters): + def _get_transformed_data(self, X, parameters): X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape prev_threads = get_num_threads() @@ -186,7 +430,7 @@ def _get_transformed_data(self, X,parameters): def _fit(self, X, y=None): parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X,parameters=parameters) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) self.scaler = StandardScaler() X_std = self.scaler.fit_transform(transformed_data) @@ -196,7 +440,7 @@ def _fit(self, X, y=None): optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) - self.pca = PCA(n_components=optimal_dimensions,random_state=self.random_state) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) @@ -210,7 +454,7 @@ def _predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) - transformed_data = self._get_transformed_data(X=X,parameters=parameters) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) transformed_data_pca = self.pca.transform(X_std) From 9bda6fb9b0cf03e7158799a016d8620d812ac59b Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 02:27:17 +0530 Subject: [PATCH 051/103] Update _r_cluster.py to fix none reproducibility --- aeon/clustering/_r_cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index be00336823..82cea94958 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -360,6 +360,7 @@ def __init__( ) def _get_parameterised_data(self, X): + np.random.seed(42) _, n_channels, n_timepoints = X.shape X = X.astype(np.float32) From 56339c7e0c1ede88a089ba17fd8cd003a13bee42 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 02:28:42 +0530 Subject: [PATCH 052/103] Update _r_cluster.py to fix none reproducibility --- aeon/clustering/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 82cea94958..7664b5ca76 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -360,7 +360,7 @@ def __init__( ) def _get_parameterised_data(self, X): - np.random.seed(42) + np.random.seed(self.random_state) _, n_channels, n_timepoints = X.shape X = X.astype(np.float32) From 353577ed6afcd8ccb186ca93526883361cd26e27 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 02:44:32 +0530 Subject: [PATCH 053/103] Update _r_cluster.py to add labels_ --- aeon/clustering/_r_cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 7664b5ca76..8c3c44dfef 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -445,6 +445,7 @@ def _fit(self, X, y=None): transformed_data_pca = self.pca.fit_transform(X_std) self._r_cluster.fit(transformed_data_pca) + self.labels_ = self._r_cluster.labels_ self.is_fitted = True def _predict(self, X, y=None) -> np.ndarray: From f694096fe0612bf634cda0c3979046504a62b976 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 19:23:44 +0530 Subject: [PATCH 054/103] Update _r_cluster.py to fix test issues --- aeon/clustering/_r_cluster.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 8c3c44dfef..fd3295e3c7 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -13,7 +13,6 @@ _fit_biases, _fit_dilations, _quantiles, - _static_transform_multi, _static_transform_uni, ) @@ -70,7 +69,7 @@ class RCluster(BaseClusterer): """ _tags = { - "capability:multivariate": True, + "capability:multivariate": False, "capability:multithreading": True, "capability:unequal_length": False, "capability:missing_values": False, @@ -424,7 +423,10 @@ def _get_transformed_data(self, X, parameters): X = X.squeeze(1) X_ = _static_transform_uni(X, parameters, self.indices) else: - X_ = _static_transform_multi(X, parameters, self.indices) + raise ValueError( + "RCluster is not compatible with multivariate data." + "Please ensure the input has only one channel.") + set_num_threads(prev_threads) return X_ From e1500fd593eb0c50c8db02dd854e7afd1d07ac0f Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sun, 1 Dec 2024 13:54:16 +0000 Subject: [PATCH 055/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index fd3295e3c7..ebf6882f44 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -425,7 +425,8 @@ def _get_transformed_data(self, X, parameters): else: raise ValueError( "RCluster is not compatible with multivariate data." - "Please ensure the input has only one channel.") + "Please ensure the input has only one channel." + ) set_num_threads(prev_threads) return X_ From 4eda409f207625010436a060210bab354aa453f6 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 20:01:14 +0530 Subject: [PATCH 056/103] Update _r_cluster.py to fix none reproducibility --- aeon/clustering/_r_cluster.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index ebf6882f44..f16e4c855c 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -1,9 +1,8 @@ import multiprocessing -from typing import Optional, Union import numpy as np from numba import get_num_threads, set_num_threads -from numpy.random import RandomState + from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler @@ -50,7 +49,7 @@ class RCluster(BaseClusterer): max_iter: int, default=300 Maximum number of iterations of the k-means algorithm for a single run. - random_state: int or np.random.RandomState instance or None, default=None + random_state: int or np.random.RandomState instance or None, default=42 Determines random number generation for centroid initialization. n_jobs : int, default=1 The number of jobs to run in parallel for `transform`. ``-1`` @@ -81,7 +80,7 @@ def __init__( max_dilations_per_kernel=32, n_clusters=8, n_init=10, - random_state: Optional[Union[int, RandomState]] = None, + random_state=42, max_iter=300, n_jobs=1, ): @@ -462,7 +461,7 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - transformed_data_pca = self.pca.transform(X_std) + transformed_data_pca = self.pca.fit_transform(X_std) return self._r_cluster.predict(transformed_data_pca) From 14e6aedd68e44c4cfd97e3f62253aba9d0d61fb3 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sun, 1 Dec 2024 14:31:48 +0000 Subject: [PATCH 057/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index f16e4c855c..c511afcac8 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -2,7 +2,6 @@ import numpy as np from numba import get_num_threads, set_num_threads - from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler From d570365b39dd4dde59b9167a14c3c862b4f72f9e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 1 Dec 2024 20:21:07 +0530 Subject: [PATCH 058/103] Update _r_cluster.py --- aeon/clustering/_r_cluster.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index c511afcac8..0518529474 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -348,14 +348,13 @@ def __init__( dtype=np.int32, ).reshape(84, 3) self.is_fitted = False - super().__init__() self._r_cluster = KMeans( n_clusters=self.n_clusters, n_init=self.n_init, random_state=self.random_state, max_iter=self.max_iter, ) - + super().__init__() def _get_parameterised_data(self, X): np.random.seed(self.random_state) _, n_channels, n_timepoints = X.shape From 4887502809c7db788109bda0bd9913b52d17e819 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sun, 1 Dec 2024 14:51:47 +0000 Subject: [PATCH 059/103] Automatic `pre-commit` fixes --- aeon/clustering/_r_cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py index 0518529474..777d605166 100644 --- a/aeon/clustering/_r_cluster.py +++ b/aeon/clustering/_r_cluster.py @@ -355,6 +355,7 @@ def __init__( max_iter=self.max_iter, ) super().__init__() + def _get_parameterised_data(self, X): np.random.seed(self.random_state) _, n_channels, n_timepoints = X.shape From 756f255f68b1071a4fe387b3e323b5e226498673 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 14 Dec 2024 03:50:19 +0530 Subject: [PATCH 060/103] updated code as requested by maintainer. --- aeon/clustering/__init__.py | 2 - aeon/clustering/feature_based/__init__.py | 2 + aeon/clustering/feature_based/_r_cluster.py | 227 ++++++++++++++++++ .../feature_based/tests/test_r_cluster.py | 20 ++ 4 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 aeon/clustering/feature_based/_r_cluster.py create mode 100644 aeon/clustering/feature_based/tests/test_r_cluster.py diff --git a/aeon/clustering/__init__.py b/aeon/clustering/__init__.py index 8c93abf682..2eec5142cf 100644 --- a/aeon/clustering/__init__.py +++ b/aeon/clustering/__init__.py @@ -11,7 +11,6 @@ "ElasticSOM", "KSpectralCentroid", "DummyClusterer", - "RCluster", ] from aeon.clustering._clara import TimeSeriesCLARA @@ -22,6 +21,5 @@ from aeon.clustering._k_sc import KSpectralCentroid from aeon.clustering._k_shape import TimeSeriesKShape from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans -from aeon.clustering._r_cluster import RCluster from aeon.clustering.base import BaseClusterer from aeon.clustering.dummy import DummyClusterer diff --git a/aeon/clustering/feature_based/__init__.py b/aeon/clustering/feature_based/__init__.py index 6eeb19ebcf..04951e75d9 100644 --- a/aeon/clustering/feature_based/__init__.py +++ b/aeon/clustering/feature_based/__init__.py @@ -8,8 +8,10 @@ "Catch22Clusterer", "SummaryClusterer", "TSFreshClusterer", + "RCluster", ] from aeon.clustering.feature_based._catch22 import Catch22Clusterer from aeon.clustering.feature_based._summary import SummaryClusterer from aeon.clustering.feature_based._tsfresh import TSFreshClusterer +from aeon.clustering.feature_based._r_cluster import RCluster diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py new file mode 100644 index 0000000000..646af4efa4 --- /dev/null +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -0,0 +1,227 @@ +import multiprocessing + +import numpy as np +from numba import get_num_threads, set_num_threads +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +from aeon.base._base import _clone_estimator +from aeon.clustering.base import BaseClusterer +from sklearn.utils import check_random_state +from aeon.transformations.collection.convolution_based._minirocket import ( + _fit_biases, + _fit_dilations, + _quantiles, + _static_transform_uni, +) + + +class RCluster(BaseClusterer): + """Time series R Clustering implementation . + + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and + feature diversity + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field + over the time series,capturing patterns at varying scales + num_cluster : int , default = 8 + The number of clusters used + estimator : sklearn clusterer, default=None + An sklearn estimator to be built using the transformed data. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + _tags = { + "capability:multivariate": False, + "capability:multithreading": True, + "capability:unequal_length": False, + "capability:missing_values": False, + } + + def __init__( + self, + n_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + estimator=None, + random_state=None, + n_jobs=1, + ): + self.n_jobs = n_jobs + self.n_kernels = n_kernels + self.max_dilations_per_kernel = max_dilations_per_kernel + self.n_clusters = n_clusters + self.estimator = estimator + self._estimator = None + self.random_state = random_state + self.indices = np.array(( + 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, + 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, + 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, + 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, + 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, + 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, + 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, + 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, + 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, + 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, + 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, + 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 + ), dtype = np.int32).reshape(84, 3) + self.is_fitted = False + super().__init__() + + def _get_parameterised_data(self, X): + + self._random_state = check_random_state(self.random_state) + _, n_channels, n_timepoints = X.shape + X = X.astype(np.float32) + + dilations, num_features_per_dilation = _fit_dilations( + n_timepoints, self.n_kernels, self.max_dilations_per_kernel + ) + + num_features_per_kernel = np.sum(num_features_per_dilation) + + quantiles = _quantiles(self.n_kernels * num_features_per_kernel) + + quantiles = self._random_state.permutation(quantiles) + + n_dilations = len(dilations) + n_combinations = self.n_kernels * n_dilations + max_n_channels = min(n_channels, 9) + max_exponent = np.log2(max_n_channels + 1) + n_channels_per_combination = ( + 2 ** self._random_state.uniform(0, max_exponent, n_combinations) + ).astype(np.int32) + channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) + n_channels_start = 0 + for combination_index in range(n_combinations): + n_channels_this_combination = n_channels_per_combination[combination_index] + n_channels_end = n_channels_start + n_channels_this_combination + channel_indices[n_channels_start:n_channels_end] = np.random.choice( + n_channels, n_channels_this_combination, replace=False + ) + n_channels_start = n_channels_end + biases = _fit_biases( + X, + n_channels_per_combination, + channel_indices, + dilations, + num_features_per_dilation, + quantiles, + self.indices, + self.random_state, + ) + + return ( + np.array([_], dtype=np.int32), + np.array([_], dtype=np.int32), + dilations, + num_features_per_dilation, + biases, + ) + + def check_params(self, X): + X = X.astype(np.float32) + if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): + n_jobs = multiprocessing.cpu_count() + else: + n_jobs = self.n_jobs + set_num_threads(n_jobs) + return X + + def _get_transformed_data(self, X, parameters): + prev_threads = get_num_threads() + X = self.check_params(X) + X = X.squeeze(1) + X_ = _static_transform_uni(X, parameters, self.indices) + set_num_threads(prev_threads) + return X_ + + def _fit(self, X, y=None): + parameters = self._get_parameterised_data(X) + + transformed_data = self._get_transformed_data(X=X, parameters=parameters) + + self.scaler = StandardScaler() + X_std = self.scaler.fit_transform(transformed_data) + + pca = PCA().fit(X_std) + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) + + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + transformed_data_pca = self.pca.fit_transform(X_std) + self._estimator = _clone_estimator( + (KMeans() if self.estimator is None else self.estimator), + self.random_state, + ) + self._estimator.fit(transformed_data_pca) + self.labels_ = self._estimator.labels_ + self.is_fitted = True + + def _predict(self, X, y=None) -> np.ndarray: + if not self.is_fitted: + raise ValueError( + "Data is not fitted. Please fit the model before using it." + ) + + parameters = self._get_parameterised_data(X) + + transformed_data = self._get_transformed_data(X=X, parameters=parameters) + + X_std = self.scaler.fit_transform(transformed_data) + transformed_data_pca = self.pca.fit_transform(X_std) + + return self._estimator.predict(transformed_data_pca) + + def _fit_predict(self, X, y=None) -> np.ndarray: + self._fit(X, y) + return self._predict(X, y) + + @classmethod + def _get_test_params(cls, parameter_set="default") -> dict: + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "n_clusters": 2, + "random_state": 1, + } \ No newline at end of file diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py new file mode 100644 index 0000000000..bc1bc97f8e --- /dev/null +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -0,0 +1,20 @@ +"""Test For RCluster.""" + +from aeon.datasets import load_gunpoint +import numpy as np +from aeon.clustering.feature_based._r_cluster import RCluster + + +def test_r_cluster(): + """Test implementation of RCluster.""" + X_train, y_train = load_gunpoint(split="train") + + num_points = 20 + + X_train = X_train[:num_points] + + rcluster = RCluster(random_state=1, n_clusters=2) + rcluster.fit(X_train) + train_result = rcluster.predict(X_train) + labs = rcluster.labels_ + assert np.array_equal(labs, train_result) From 7c63f205c2f168cf043fd56b2426e00e1f66363d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 13 Dec 2024 22:22:51 +0000 Subject: [PATCH 061/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/__init__.py | 2 +- aeon/clustering/feature_based/_r_cluster.py | 279 +++++++++++++++++- .../feature_based/tests/__init__.py | 0 .../feature_based/tests/test_r_cluster.py | 3 +- 4 files changed, 266 insertions(+), 18 deletions(-) create mode 100644 aeon/clustering/feature_based/tests/__init__.py diff --git a/aeon/clustering/feature_based/__init__.py b/aeon/clustering/feature_based/__init__.py index 04951e75d9..0467681248 100644 --- a/aeon/clustering/feature_based/__init__.py +++ b/aeon/clustering/feature_based/__init__.py @@ -12,6 +12,6 @@ ] from aeon.clustering.feature_based._catch22 import Catch22Clusterer +from aeon.clustering.feature_based._r_cluster import RCluster from aeon.clustering.feature_based._summary import SummaryClusterer from aeon.clustering.feature_based._tsfresh import TSFreshClusterer -from aeon.clustering.feature_based._r_cluster import RCluster diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 646af4efa4..3553fe6c64 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -5,9 +5,10 @@ from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler +from sklearn.utils import check_random_state + from aeon.base._base import _clone_estimator from aeon.clustering.base import BaseClusterer -from sklearn.utils import check_random_state from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, _fit_dilations, @@ -45,15 +46,18 @@ class RCluster(BaseClusterer): The number of jobs to run in parallel for `transform`. ``-1`` means using all processors. + Notes ----- Adapted from the implementation from source code https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + References ---------- .. [1] Time series clustering with random convolutional kernels https://link.springer.com/article/10.1007/s10618-024-01018-x """ + _tags = { "capability:multivariate": False, "capability:multithreading": True, @@ -77,20 +81,263 @@ def __init__( self.estimator = estimator self._estimator = None self.random_state = random_state - self.indices = np.array(( - 1, 3, 6, 1, 2, 7, 1, 2, 3, 0, 2, 3, 1, 4, 5, 0, 1, 3, 3, 5, 6, 0, - 1, 2, 2, 5, 8, 1, 3, 7, 0, 1, 8, 4, 6, 7, 0, 1, 4, 3, 4, 6, 0, 4, - 5, 2, 6, 7, 5, 6, 7, 0, 1, 6, 4, 5, 7, 4, 7, 8, 1, 6, 8, 0, 2, 6, - 5, 6, 8, 2, 5, 7, 0, 1, 7, 0, 7, 8, 0, 3, 5, 0, 3, 7, 2, 3, 8, 2, - 3, 4, 1, 4, 6, 3, 4, 5, 0, 3, 8, 4, 5, 8, 0, 4, 6, 1, 4, 8, 6, 7, - 8, 4, 6, 8, 0, 3, 4, 1, 3, 4, 1, 5, 7, 1, 4, 7, 1, 2, 8, 0, 6, 7, - 1, 6, 7, 1, 3, 5, 0, 1, 5, 0, 4, 8, 4, 5, 6, 0, 2, 5, 3, 5, 7, 0, - 2, 4, 2, 6, 8, 2, 3, 7, 2, 5, 6, 2, 4, 8, 0, 2, 7, 3, 6, 8, 2, 3, - 6, 3, 7, 8, 0, 5, 8, 1, 2, 6, 2, 3, 5, 1, 5, 8, 3, 6, 7, 3, 4, 7, - 0, 4, 7, 3, 5, 8, 2, 4, 5, 1, 2, 5, 2, 7, 8, 2, 4, 6, 0, 5, 6, 3, - 4, 8, 0, 6, 8, 2, 4, 7, 0, 2, 8, 0, 3, 6, 5, 7, 8, 1, 5, 6, 1, 2, - 4, 0, 5, 7, 1, 3, 8, 1, 7, 8 - ), dtype = np.int32).reshape(84, 3) + self.indices = np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) self.is_fitted = False super().__init__() @@ -224,4 +471,4 @@ def _get_test_params(cls, parameter_set="default") -> dict: return { "n_clusters": 2, "random_state": 1, - } \ No newline at end of file + } diff --git a/aeon/clustering/feature_based/tests/__init__.py b/aeon/clustering/feature_based/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index bc1bc97f8e..4fd806bdba 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -1,8 +1,9 @@ """Test For RCluster.""" -from aeon.datasets import load_gunpoint import numpy as np + from aeon.clustering.feature_based._r_cluster import RCluster +from aeon.datasets import load_gunpoint def test_r_cluster(): From 0a8e1baf2eb45abdcac0af0c2dc390e59278ff56 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 14 Dec 2024 03:54:00 +0530 Subject: [PATCH 062/103] deleted the old code --- aeon/clustering/_r_cluster.py | 469 ---------------------------------- 1 file changed, 469 deletions(-) delete mode 100644 aeon/clustering/_r_cluster.py diff --git a/aeon/clustering/_r_cluster.py b/aeon/clustering/_r_cluster.py deleted file mode 100644 index 777d605166..0000000000 --- a/aeon/clustering/_r_cluster.py +++ /dev/null @@ -1,469 +0,0 @@ -import multiprocessing - -import numpy as np -from numba import get_num_threads, set_num_threads -from sklearn.cluster import KMeans -from sklearn.decomposition import PCA -from sklearn.preprocessing import StandardScaler - -from aeon.clustering.base import BaseClusterer -from aeon.transformations.collection.convolution_based._minirocket import ( - _fit_biases, - _fit_dilations, - _quantiles, - _static_transform_uni, -) - - -class RCluster(BaseClusterer): - """Time series R Clustering implementation . - - Adapted from the implementation used in [1]_ - - Parameters - ---------- - num_kernels : int , default = 84 - The number of convolutional kernels used to transform the input time series - These kernels are fixed and pre-defined (not random) and are - optimized for computational speed and - feature diversity - - max_dilations_per_kernel : int , default = 32 - The maximum number of dilation rates applied to each kernel - Dilations control the spacing of the kernel's receptive field - over the time series,capturing patterns at varying scales - - num_features : int , default = 500 - The number of features extracted per kernel after applying - the transformation - - num_cluster : int , default = 8 - The number of clusters used - - n_init : int , default = 10 - The number of times the clustering algorithm (e.g., KMeans) will - run with different centroid seeds - to avoid poor local optima - - max_iter: int, default=300 - Maximum number of iterations of the k-means algorithm for a single - run. - random_state: int or np.random.RandomState instance or None, default=42 - Determines random number generation for centroid initialization. - n_jobs : int, default=1 - The number of jobs to run in parallel for `transform`. ``-1`` - means using all - processors. - - Notes - ----- - Adapted from the implementation from source code - https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb - - References - ---------- - .. [1] Time series clustering with random convolutional kernels - https://link.springer.com/article/10.1007/s10618-024-01018-x - """ - - _tags = { - "capability:multivariate": False, - "capability:multithreading": True, - "capability:unequal_length": False, - "capability:missing_values": False, - } - - def __init__( - self, - n_kernels=84, - max_dilations_per_kernel=32, - n_clusters=8, - n_init=10, - random_state=42, - max_iter=300, - n_jobs=1, - ): - self.n_jobs = n_jobs - self.n_kernels = n_kernels - self.max_dilations_per_kernel = max_dilations_per_kernel - self.n_clusters = n_clusters - self.n_init = n_init - self.random_state = random_state - self.max_iter = max_iter - self.indices = np.array( - ( - 1, - 3, - 6, - 1, - 2, - 7, - 1, - 2, - 3, - 0, - 2, - 3, - 1, - 4, - 5, - 0, - 1, - 3, - 3, - 5, - 6, - 0, - 1, - 2, - 2, - 5, - 8, - 1, - 3, - 7, - 0, - 1, - 8, - 4, - 6, - 7, - 0, - 1, - 4, - 3, - 4, - 6, - 0, - 4, - 5, - 2, - 6, - 7, - 5, - 6, - 7, - 0, - 1, - 6, - 4, - 5, - 7, - 4, - 7, - 8, - 1, - 6, - 8, - 0, - 2, - 6, - 5, - 6, - 8, - 2, - 5, - 7, - 0, - 1, - 7, - 0, - 7, - 8, - 0, - 3, - 5, - 0, - 3, - 7, - 2, - 3, - 8, - 2, - 3, - 4, - 1, - 4, - 6, - 3, - 4, - 5, - 0, - 3, - 8, - 4, - 5, - 8, - 0, - 4, - 6, - 1, - 4, - 8, - 6, - 7, - 8, - 4, - 6, - 8, - 0, - 3, - 4, - 1, - 3, - 4, - 1, - 5, - 7, - 1, - 4, - 7, - 1, - 2, - 8, - 0, - 6, - 7, - 1, - 6, - 7, - 1, - 3, - 5, - 0, - 1, - 5, - 0, - 4, - 8, - 4, - 5, - 6, - 0, - 2, - 5, - 3, - 5, - 7, - 0, - 2, - 4, - 2, - 6, - 8, - 2, - 3, - 7, - 2, - 5, - 6, - 2, - 4, - 8, - 0, - 2, - 7, - 3, - 6, - 8, - 2, - 3, - 6, - 3, - 7, - 8, - 0, - 5, - 8, - 1, - 2, - 6, - 2, - 3, - 5, - 1, - 5, - 8, - 3, - 6, - 7, - 3, - 4, - 7, - 0, - 4, - 7, - 3, - 5, - 8, - 2, - 4, - 5, - 1, - 2, - 5, - 2, - 7, - 8, - 2, - 4, - 6, - 0, - 5, - 6, - 3, - 4, - 8, - 0, - 6, - 8, - 2, - 4, - 7, - 0, - 2, - 8, - 0, - 3, - 6, - 5, - 7, - 8, - 1, - 5, - 6, - 1, - 2, - 4, - 0, - 5, - 7, - 1, - 3, - 8, - 1, - 7, - 8, - ), - dtype=np.int32, - ).reshape(84, 3) - self.is_fitted = False - self._r_cluster = KMeans( - n_clusters=self.n_clusters, - n_init=self.n_init, - random_state=self.random_state, - max_iter=self.max_iter, - ) - super().__init__() - - def _get_parameterised_data(self, X): - np.random.seed(self.random_state) - _, n_channels, n_timepoints = X.shape - X = X.astype(np.float32) - - dilations, num_features_per_dilation = _fit_dilations( - n_timepoints, self.n_kernels, self.max_dilations_per_kernel - ) - - num_features_per_kernel = np.sum(num_features_per_dilation) - - quantiles = _quantiles(self.n_kernels * num_features_per_kernel) - - # MODIFICATION - quantiles = np.random.permutation(quantiles) - - n_dilations = len(dilations) - n_combinations = self.n_kernels * n_dilations - max_n_channels = min(n_channels, 9) - max_exponent = np.log2(max_n_channels + 1) - n_channels_per_combination = ( - 2 ** np.random.uniform(0, max_exponent, n_combinations) - ).astype(np.int32) - channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) - n_channels_start = 0 - for combination_index in range(n_combinations): - n_channels_this_combination = n_channels_per_combination[combination_index] - n_channels_end = n_channels_start + n_channels_this_combination - channel_indices[n_channels_start:n_channels_end] = np.random.choice( - n_channels, n_channels_this_combination, replace=False - ) - n_channels_start = n_channels_end - - biases = _fit_biases( - X, - n_channels_per_combination, - channel_indices, - dilations, - num_features_per_dilation, - quantiles, - self.indices, - self.random_state, - ) - - return ( - np.array([_], dtype=np.int32), - np.array([_], dtype=np.int32), - dilations, - num_features_per_dilation, - biases, - ) - - def _get_transformed_data(self, X, parameters): - X = X.astype(np.float32) - _, n_channels, n_timepoints = X.shape - prev_threads = get_num_threads() - if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): - n_jobs = multiprocessing.cpu_count() - else: - n_jobs = self.n_jobs - set_num_threads(n_jobs) - if n_channels == 1: - X = X.squeeze(1) - X_ = _static_transform_uni(X, parameters, self.indices) - else: - raise ValueError( - "RCluster is not compatible with multivariate data." - "Please ensure the input has only one channel." - ) - - set_num_threads(prev_threads) - return X_ - - def _fit(self, X, y=None): - parameters = self._get_parameterised_data(X) - - transformed_data = self._get_transformed_data(X=X, parameters=parameters) - - self.scaler = StandardScaler() - X_std = self.scaler.fit_transform(transformed_data) - - pca = PCA().fit(X_std) - optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - - optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) - - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) - transformed_data_pca = self.pca.fit_transform(X_std) - - self._r_cluster.fit(transformed_data_pca) - self.labels_ = self._r_cluster.labels_ - self.is_fitted = True - - def _predict(self, X, y=None) -> np.ndarray: - if not self.is_fitted: - raise ValueError( - "Data is not fitted. Please fit the model before using it." - ) - - parameters = self._get_parameterised_data(X) - - transformed_data = self._get_transformed_data(X=X, parameters=parameters) - - X_std = self.scaler.fit_transform(transformed_data) - transformed_data_pca = self.pca.fit_transform(X_std) - - return self._r_cluster.predict(transformed_data_pca) - - def _fit_predict(self, X, y=None) -> np.ndarray: - self._fit(X, y) - return self._predict(X, y) From 4e3e09b77ce751fc05bd7df62a043534852c8626 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Sat, 14 Dec 2024 03:56:53 +0530 Subject: [PATCH 063/103] add init file in featuer_based/tests --- aeon/clustering/feature_based/tests/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/feature_based/tests/__init__.py b/aeon/clustering/feature_based/tests/__init__.py index e69de29bb2..3baec9f87b 100644 --- a/aeon/clustering/feature_based/tests/__init__.py +++ b/aeon/clustering/feature_based/tests/__init__.py @@ -0,0 +1 @@ +"""Feature Based learning clustering tests.""" \ No newline at end of file From 42de76432a5be94e313e8ab4fe62223fc965da9b Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 13 Dec 2024 22:27:30 +0000 Subject: [PATCH 064/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/tests/__init__.py b/aeon/clustering/feature_based/tests/__init__.py index 3baec9f87b..d6e8de2247 100644 --- a/aeon/clustering/feature_based/tests/__init__.py +++ b/aeon/clustering/feature_based/tests/__init__.py @@ -1 +1 @@ -"""Feature Based learning clustering tests.""" \ No newline at end of file +"""Feature Based learning clustering tests.""" From 061af60712a25c29943da2f1923aee26ec2df094 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 14 Dec 2024 04:11:59 +0530 Subject: [PATCH 065/103] Update _r_cluster.py to fix test issues --- aeon/clustering/feature_based/_r_cluster.py | 22 --------------------- 1 file changed, 22 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 3553fe6c64..b29ca02463 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -450,25 +450,3 @@ def _predict(self, X, y=None) -> np.ndarray: def _fit_predict(self, X, y=None) -> np.ndarray: self._fit(X, y) return self._predict(X, y) - - @classmethod - def _get_test_params(cls, parameter_set="default") -> dict: - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - - Returns - ------- - params : dict or list of dict, default={} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - """ - return { - "n_clusters": 2, - "random_state": 1, - } From 55b3b3e4039bc2fd9e320ca6fcce6249e2de43db Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:15:00 +0530 Subject: [PATCH 066/103] Update _r_cluster.py to convert input to float32 and update random_state --- aeon/clustering/feature_based/_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index b29ca02463..47597ac4c3 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -71,7 +71,7 @@ def __init__( max_dilations_per_kernel=32, n_clusters=8, estimator=None, - random_state=None, + random_state=42, n_jobs=1, ): self.n_jobs = n_jobs @@ -342,7 +342,7 @@ def __init__( super().__init__() def _get_parameterised_data(self, X): - + X = X.astype(np.float32) self._random_state = check_random_state(self.random_state) _, n_channels, n_timepoints = X.shape X = X.astype(np.float32) From 7994dea76c558e8714c9f0710c49f70b5b994c87 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:37:36 +0530 Subject: [PATCH 067/103] Update _r_cluster.py_random_state --- aeon/clustering/feature_based/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 47597ac4c3..5cbba4da7e 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -71,7 +71,7 @@ def __init__( max_dilations_per_kernel=32, n_clusters=8, estimator=None, - random_state=42, + random_state=None, n_jobs=1, ): self.n_jobs = n_jobs From 04f78e06726854ebbe6c5926e62aa250f6f8d009 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 14 Dec 2024 15:44:24 +0530 Subject: [PATCH 068/103] Update _r_cluster.py to add _get_test_params --- aeon/clustering/feature_based/_r_cluster.py | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 5cbba4da7e..755621da9c 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -450,3 +450,27 @@ def _predict(self, X, y=None) -> np.ndarray: def _fit_predict(self, X, y=None) -> np.ndarray: self._fit(X, y) return self._predict(X, y) + + @classmethod + def _get_test_params(cls, parameter_set="default") -> dict: + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "n_kernels": 84, + "max_dilations_per_kernel": 32, + "n_clusters": 8, + "random_state": 1, + } From 81217384ccda22244230ae0abbf5c061f6899e0d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 14 Dec 2024 10:15:06 +0000 Subject: [PATCH 069/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 755621da9c..ab0035afa6 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -450,7 +450,7 @@ def _predict(self, X, y=None) -> np.ndarray: def _fit_predict(self, X, y=None) -> np.ndarray: self._fit(X, y) return self._predict(X, y) - + @classmethod def _get_test_params(cls, parameter_set="default") -> dict: """Return testing parameter settings for the estimator. From 6c5d6a102dc61c993498a6914ba556e70ddd4238 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 15 Dec 2024 15:13:23 +0530 Subject: [PATCH 070/103] Update _r_cluster.py to update random_state for kmean and pca --- aeon/clustering/feature_based/_r_cluster.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index ab0035afa6..051183b07d 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -410,6 +410,7 @@ def _get_transformed_data(self, X, parameters): return X_ def _fit(self, X, y=None): + _random_state = check_random_state(self.random_state) parameters = self._get_parameterised_data(X) transformed_data = self._get_transformed_data(X=X, parameters=parameters) @@ -422,11 +423,11 @@ def _fit(self, X, y=None): optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + self.pca = PCA(n_components=optimal_dimensions, random_state=_random_state) transformed_data_pca = self.pca.fit_transform(X_std) self._estimator = _clone_estimator( (KMeans() if self.estimator is None else self.estimator), - self.random_state, + _random_state, ) self._estimator.fit(transformed_data_pca) self.labels_ = self._estimator.labels_ From 9e8fa2037803ebd8d56e687c5161698566cde3bd Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:39:37 +0400 Subject: [PATCH 071/103] Update _r_cluster.py to check if random_state causes test issues --- aeon/clustering/feature_based/_r_cluster.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 051183b07d..aba1e9f836 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -342,10 +342,10 @@ def __init__( super().__init__() def _get_parameterised_data(self, X): + np.random.seed(self.random_state) X = X.astype(np.float32) - self._random_state = check_random_state(self.random_state) + _, n_channels, n_timepoints = X.shape - X = X.astype(np.float32) dilations, num_features_per_dilation = _fit_dilations( n_timepoints, self.n_kernels, self.max_dilations_per_kernel @@ -355,14 +355,14 @@ def _get_parameterised_data(self, X): quantiles = _quantiles(self.n_kernels * num_features_per_kernel) - quantiles = self._random_state.permutation(quantiles) + quantiles = np.random.permutation(quantiles) n_dilations = len(dilations) n_combinations = self.n_kernels * n_dilations max_n_channels = min(n_channels, 9) max_exponent = np.log2(max_n_channels + 1) n_channels_per_combination = ( - 2 ** self._random_state.uniform(0, max_exponent, n_combinations) + 2 ** np.random.uniform(0, max_exponent, n_combinations) ).astype(np.int32) channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) n_channels_start = 0 @@ -391,7 +391,7 @@ def _get_parameterised_data(self, X): num_features_per_dilation, biases, ) - + def check_params(self, X): X = X.astype(np.float32) if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): From bdfe1f91cda2f8b63e61282a2b436981dae4c219 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 17 Dec 2024 10:40:10 +0000 Subject: [PATCH 072/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index aba1e9f836..fb1b200842 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -391,7 +391,7 @@ def _get_parameterised_data(self, X): num_features_per_dilation, biases, ) - + def check_params(self, X): X = X.astype(np.float32) if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): From 106bb752c8dd807f865eb4632a8aeda533b85794 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:01:20 +0400 Subject: [PATCH 073/103] Update _r_cluster.py to use only kmeans --- aeon/clustering/feature_based/_r_cluster.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index fb1b200842..4a53f37571 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -70,7 +70,6 @@ def __init__( n_kernels=84, max_dilations_per_kernel=32, n_clusters=8, - estimator=None, random_state=None, n_jobs=1, ): @@ -78,8 +77,6 @@ def __init__( self.n_kernels = n_kernels self.max_dilations_per_kernel = max_dilations_per_kernel self.n_clusters = n_clusters - self.estimator = estimator - self._estimator = None self.random_state = random_state self.indices = np.array( ( @@ -339,6 +336,7 @@ def __init__( dtype=np.int32, ).reshape(84, 3) self.is_fitted = False + self.estimator = KMeans(n_clusters=self.n_clusters,random_state=self.random_state) super().__init__() def _get_parameterised_data(self, X): @@ -362,7 +360,7 @@ def _get_parameterised_data(self, X): max_n_channels = min(n_channels, 9) max_exponent = np.log2(max_n_channels + 1) n_channels_per_combination = ( - 2 ** np.random.uniform(0, max_exponent, n_combinations) + 2 ** np.random.uniform(0, max_exponent, n_combinations) ).astype(np.int32) channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) n_channels_start = 0 @@ -425,12 +423,8 @@ def _fit(self, X, y=None): self.pca = PCA(n_components=optimal_dimensions, random_state=_random_state) transformed_data_pca = self.pca.fit_transform(X_std) - self._estimator = _clone_estimator( - (KMeans() if self.estimator is None else self.estimator), - _random_state, - ) - self._estimator.fit(transformed_data_pca) - self.labels_ = self._estimator.labels_ + self.estimator.fit(transformed_data_pca) + self.labels_ = self.estimator.labels_ self.is_fitted = True def _predict(self, X, y=None) -> np.ndarray: @@ -446,7 +440,7 @@ def _predict(self, X, y=None) -> np.ndarray: X_std = self.scaler.fit_transform(transformed_data) transformed_data_pca = self.pca.fit_transform(X_std) - return self._estimator.predict(transformed_data_pca) + return self.estimator.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: self._fit(X, y) From 92e8c2718e75282d7afd03d494469cdb865b6305 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 17 Dec 2024 11:02:03 +0000 Subject: [PATCH 074/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 4a53f37571..88fda0acd9 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -336,7 +336,9 @@ def __init__( dtype=np.int32, ).reshape(84, 3) self.is_fitted = False - self.estimator = KMeans(n_clusters=self.n_clusters,random_state=self.random_state) + self.estimator = KMeans( + n_clusters=self.n_clusters, random_state=self.random_state + ) super().__init__() def _get_parameterised_data(self, X): @@ -360,7 +362,7 @@ def _get_parameterised_data(self, X): max_n_channels = min(n_channels, 9) max_exponent = np.log2(max_n_channels + 1) n_channels_per_combination = ( - 2 ** np.random.uniform(0, max_exponent, n_combinations) + 2 ** np.random.uniform(0, max_exponent, n_combinations) ).astype(np.int32) channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) n_channels_start = 0 From b597c8c97e4012723e13943d76b907d45e3a6b0a Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:04:16 +0400 Subject: [PATCH 075/103] Update _r_cluster.py to remove aeon._clone_estimator --- aeon/clustering/feature_based/_r_cluster.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 88fda0acd9..bb016a8cf2 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -6,8 +6,6 @@ from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.utils import check_random_state - -from aeon.base._base import _clone_estimator from aeon.clustering.base import BaseClusterer from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, From 0b822f36459bde37737e1ede48107ac5da0f8a2b Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 17 Dec 2024 11:05:01 +0000 Subject: [PATCH 076/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index bb016a8cf2..3b9207a47f 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -6,6 +6,7 @@ from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.utils import check_random_state + from aeon.clustering.base import BaseClusterer from aeon.transformations.collection.convolution_based._minirocket import ( _fit_biases, From f0e462462ff3f74283c8926e0899975b60da02eb Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:17:19 +0400 Subject: [PATCH 077/103] Update _r_cluster.py to update docs --- aeon/clustering/feature_based/_r_cluster.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 3b9207a47f..ed60ff4009 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -34,13 +34,7 @@ class RCluster(BaseClusterer): over the time series,capturing patterns at varying scales num_cluster : int , default = 8 The number of clusters used - estimator : sklearn clusterer, default=None - An sklearn estimator to be built using the transformed data. - random_state : int, RandomState instance or None, default=None - If `int`, random_state is the seed used by the random number generator; - If `RandomState` instance, random_state is the random number generator; - If `None`, the random number generator is the `RandomState` instance used - by `np.random`. + random_state : int, Random state or None, default=None n_jobs : int, default=1 The number of jobs to run in parallel for `transform`. ``-1`` means using all From 99e9d97eed39edef38013c2acdec83202f40f0f0 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:08:35 +0400 Subject: [PATCH 078/103] Update _r_cluster.py to add extra parameters and update fit_predict --- aeon/clustering/feature_based/_r_cluster.py | 23 ++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index ed60ff4009..2007a0753f 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -65,7 +65,11 @@ def __init__( n_clusters=8, random_state=None, n_jobs=1, + n_init = 10, + num_features=500, ): + self.num_features = num_features + self.n_init=n_init self.n_jobs = n_jobs self.n_kernels = n_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -330,7 +334,8 @@ def __init__( ).reshape(84, 3) self.is_fitted = False self.estimator = KMeans( - n_clusters=self.n_clusters, random_state=self.random_state + n_clusters=self.n_clusters, random_state=self.random_state, + n_init= self.n_init ) super().__init__() @@ -341,7 +346,7 @@ def _get_parameterised_data(self, X): _, n_channels, n_timepoints = X.shape dilations, num_features_per_dilation = _fit_dilations( - n_timepoints, self.n_kernels, self.max_dilations_per_kernel + n_timepoints, self.num_features, self.max_dilations_per_kernel ) num_features_per_kernel = np.sum(num_features_per_dilation) @@ -403,7 +408,6 @@ def _get_transformed_data(self, X, parameters): return X_ def _fit(self, X, y=None): - _random_state = check_random_state(self.random_state) parameters = self._get_parameterised_data(X) transformed_data = self._get_transformed_data(X=X, parameters=parameters) @@ -438,8 +442,17 @@ def _predict(self, X, y=None) -> np.ndarray: return self.estimator.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: - self._fit(X, y) - return self._predict(X, y) + parameters = self._get_parameterised_data(X) + transformed_data = self._get_transformed_data(X=X, parameters=parameters) + self.scaler = StandardScaler() + X_std = self.scaler.fit_transform(transformed_data) + + pca = PCA().fit(X_std) + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + transformed_data_pca = self.pca.fit_transform(X_std) + return self.estimator.fit_predict(transformed_data_pca) @classmethod def _get_test_params(cls, parameter_set="default") -> dict: From 20780b43b176c62bf2d27a7d1f84f5ab48452ff3 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Wed, 18 Dec 2024 17:09:08 +0000 Subject: [PATCH 079/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 2007a0753f..752927d3e5 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -65,11 +65,11 @@ def __init__( n_clusters=8, random_state=None, n_jobs=1, - n_init = 10, + n_init=10, num_features=500, ): self.num_features = num_features - self.n_init=n_init + self.n_init = n_init self.n_jobs = n_jobs self.n_kernels = n_kernels self.max_dilations_per_kernel = max_dilations_per_kernel @@ -334,8 +334,9 @@ def __init__( ).reshape(84, 3) self.is_fitted = False self.estimator = KMeans( - n_clusters=self.n_clusters, random_state=self.random_state, - n_init= self.n_init + n_clusters=self.n_clusters, + random_state=self.random_state, + n_init=self.n_init, ) super().__init__() From 6abee03e2207e9d8bfb52ed0b52f51819d05d347 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:11:57 +0400 Subject: [PATCH 080/103] Update _r_cluster.py to random_state --- aeon/clustering/feature_based/_r_cluster.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 752927d3e5..5ed9748542 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -5,7 +5,6 @@ from sklearn.cluster import KMeans from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler -from sklearn.utils import check_random_state from aeon.clustering.base import BaseClusterer from aeon.transformations.collection.convolution_based._minirocket import ( @@ -421,7 +420,7 @@ def _fit(self, X, y=None): optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) - self.pca = PCA(n_components=optimal_dimensions, random_state=_random_state) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self.estimator.fit(transformed_data_pca) self.labels_ = self.estimator.labels_ From 75588934d7ca0db58a68692681e44914f3149637 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:20:26 +0400 Subject: [PATCH 081/103] Update _r_cluster.py to add description about new added parameters --- aeon/clustering/feature_based/_r_cluster.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 5ed9748542..c58a045e82 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -38,6 +38,12 @@ class RCluster(BaseClusterer): The number of jobs to run in parallel for `transform`. ``-1`` means using all processors. + num_features : int, default=500 + Number of features need for fit_dilations method. + n_init : int, default=10 + Number of times the R-Cluster algorithm will be run with different + centroid seeds. The final result will be the best output of n_init + consecutive runs in terms of inertia. Notes ----- From 5f0a770b1e9f0adb84e25b51c00abbbe0c273573 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:58:11 +0400 Subject: [PATCH 082/103] Updated _r_cluster.py as requested by moderators --- aeon/clustering/feature_based/_r_cluster.py | 35 +++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index c58a045e82..df274b1b1a 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -15,8 +15,8 @@ ) -class RCluster(BaseClusterer): - """Time series R Clustering implementation . +class RClusterer(BaseClusterer): + """Implementation of Time Series R Cluster Adapted from the implementation used in [1]_ @@ -68,10 +68,10 @@ def __init__( n_kernels=84, max_dilations_per_kernel=32, n_clusters=8, - random_state=None, - n_jobs=1, n_init=10, num_features=500, + random_state=None, + n_jobs=1, ): self.num_features = num_features self.n_init = n_init @@ -337,16 +337,10 @@ def __init__( ), dtype=np.int32, ).reshape(84, 3) - self.is_fitted = False - self.estimator = KMeans( - n_clusters=self.n_clusters, - random_state=self.random_state, - n_init=self.n_init, - ) super().__init__() def _get_parameterised_data(self, X): - np.random.seed(self.random_state) + random_state = np.random.RandomState(self.random_state) X = X.astype(np.float32) _, n_channels, n_timepoints = X.shape @@ -359,7 +353,7 @@ def _get_parameterised_data(self, X): quantiles = _quantiles(self.n_kernels * num_features_per_kernel) - quantiles = np.random.permutation(quantiles) + quantiles = random_state.permutation(quantiles) n_dilations = len(dilations) n_combinations = self.n_kernels * n_dilations @@ -424,14 +418,16 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[1])) + optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[2])) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) + self.estimator = KMeans( + n_clusters=self.n_clusters, + random_state=self.random_state, + n_init=self.n_init, + ) self.estimator.fit(transformed_data_pca) - self.labels_ = self.estimator.labels_ - self.is_fitted = True - def _predict(self, X, y=None) -> np.ndarray: if not self.is_fitted: raise ValueError( @@ -455,9 +451,16 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[2])) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) + self.estimator = KMeans( + n_clusters=self.n_clusters, + random_state=self.random_state, + n_init=self.n_init, + ) return self.estimator.fit_predict(transformed_data_pca) @classmethod From 26782b49b21d02afc8538b26ac88b49872395e39 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:58:30 +0400 Subject: [PATCH 083/103] Update __init__.py to fix typo mistake --- aeon/clustering/feature_based/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/__init__.py b/aeon/clustering/feature_based/__init__.py index 0467681248..7aa9214884 100644 --- a/aeon/clustering/feature_based/__init__.py +++ b/aeon/clustering/feature_based/__init__.py @@ -8,10 +8,10 @@ "Catch22Clusterer", "SummaryClusterer", "TSFreshClusterer", - "RCluster", + "RClusterer", ] from aeon.clustering.feature_based._catch22 import Catch22Clusterer -from aeon.clustering.feature_based._r_cluster import RCluster +from aeon.clustering.feature_based._r_cluster import RClusterer from aeon.clustering.feature_based._summary import SummaryClusterer from aeon.clustering.feature_based._tsfresh import TSFreshClusterer From c2556409d97203a8bb1f1a641cfe6bae5ced765b Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:58:58 +0400 Subject: [PATCH 084/103] Updated clustering.rst for docs --- docs/api_reference/clustering.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst index d07f86e3c9..d10c9303ed 100644 --- a/docs/api_reference/clustering.rst +++ b/docs/api_reference/clustering.rst @@ -26,7 +26,7 @@ Clustering Algorithms TimeSeriesCLARANS ElasticSOM KSpectralCentroid - RCluster + RClusterer Deep learning ------------- From 31e6f8228447ea3e20bf930e644b085b79ae439a Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 18:59:36 +0000 Subject: [PATCH 085/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index df274b1b1a..c445734a96 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -428,6 +428,7 @@ def _fit(self, X, y=None): n_init=self.n_init, ) self.estimator.fit(transformed_data_pca) + def _predict(self, X, y=None) -> np.ndarray: if not self.is_fitted: raise ValueError( @@ -451,7 +452,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - + optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[2])) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) From 82f874d88907dbe5d96ae038f1da5f6a410056e4 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 23:04:40 +0400 Subject: [PATCH 086/103] Update _r_cluster.py to update docs --- aeon/clustering/feature_based/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index c445734a96..f010ab1a41 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -16,7 +16,7 @@ class RClusterer(BaseClusterer): - """Implementation of Time Series R Cluster + """Implementation of Time Series R Cluster. Adapted from the implementation used in [1]_ From 9340bf6852551cc6eed889bbff975f17de0843d2 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 23:13:32 +0400 Subject: [PATCH 087/103] Update test_r_cluster.py to update test --- aeon/clustering/feature_based/tests/test_r_cluster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index 4fd806bdba..b08042c3b2 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -2,7 +2,7 @@ import numpy as np -from aeon.clustering.feature_based._r_cluster import RCluster +from aeon.clustering.feature_based._r_cluster import RClusterer from aeon.datasets import load_gunpoint @@ -14,7 +14,7 @@ def test_r_cluster(): X_train = X_train[:num_points] - rcluster = RCluster(random_state=1, n_clusters=2) + rcluster = RClusterer(random_state=1, n_clusters=2) rcluster.fit(X_train) train_result = rcluster.predict(X_train) labs = rcluster.labels_ From 9fe83bfa83de2f864beaabab76941600766486e8 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 23:19:31 +0400 Subject: [PATCH 088/103] Update _r_cluster.py to add self.labels_ --- aeon/clustering/feature_based/_r_cluster.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index f010ab1a41..e3291780e1 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -428,6 +428,7 @@ def _fit(self, X, y=None): n_init=self.n_init, ) self.estimator.fit(transformed_data_pca) + self.labels_ = self.estimator.labels_ def _predict(self, X, y=None) -> np.ndarray: if not self.is_fitted: @@ -462,6 +463,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: random_state=self.random_state, n_init=self.n_init, ) + self.labels_ = self.estimator.labels_ return self.estimator.fit_predict(transformed_data_pca) @classmethod From 7fc607bc2aa89f49c6d44502f835aca9f0abf182 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 23:58:02 +0400 Subject: [PATCH 089/103] Update _r_cluster.py for fising test issue in optimal dimensions --- aeon/clustering/feature_based/_r_cluster.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index e3291780e1..f6cbf8ccb0 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -418,7 +418,7 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[2])) + optimal_dimensions = max(1, min(optimal_dimensions, X_std.shape[0],X_std.shape[1])) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) @@ -454,7 +454,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max(1, min(optimal_dimensions, X.shape[0], X.shape[2])) + optimal_dimensions = max(1, min(optimal_dimensions, X_std.shape[0],X_std.shape[1])) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) @@ -463,8 +463,9 @@ def _fit_predict(self, X, y=None) -> np.ndarray: random_state=self.random_state, n_init=self.n_init, ) + Y = self.estimator.fit_predict(transformed_data_pca) self.labels_ = self.estimator.labels_ - return self.estimator.fit_predict(transformed_data_pca) + return Y @classmethod def _get_test_params(cls, parameter_set="default") -> dict: From 906b67d6bb663e8dcb93da5180fa71de03070c5b Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 19:58:37 +0000 Subject: [PATCH 090/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index f6cbf8ccb0..5af20847cf 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -418,7 +418,9 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max(1, min(optimal_dimensions, X_std.shape[0],X_std.shape[1])) + optimal_dimensions = max( + 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) + ) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) @@ -454,7 +456,9 @@ def _fit_predict(self, X, y=None) -> np.ndarray: pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max(1, min(optimal_dimensions, X_std.shape[0],X_std.shape[1])) + optimal_dimensions = max( + 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) + ) self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) From a980fd0ca54b2b9501e6de57f9e580c7f6cc2df8 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 00:18:45 +0400 Subject: [PATCH 091/103] Update _r_cluster.py --- aeon/clustering/feature_based/_r_cluster.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 5af20847cf..d9d593ecc5 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -422,7 +422,7 @@ def _fit(self, X, y=None): 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + self.__pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, @@ -443,7 +443,7 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - transformed_data_pca = self.pca.fit_transform(X_std) + transformed_data_pca = self.__pca.fit_transform(X_std) return self.estimator.predict(transformed_data_pca) @@ -460,8 +460,8 @@ def _fit_predict(self, X, y=None) -> np.ndarray: 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) - transformed_data_pca = self.pca.fit_transform(X_std) + self.__pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + transformed_data_pca = self.__pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, From 1fb4d48a936141e0b72920ac898a5552de9a5599 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 20:19:13 +0000 Subject: [PATCH 092/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index d9d593ecc5..a8b6323784 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -422,7 +422,9 @@ def _fit(self, X, y=None): 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.__pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + self.__pca = PCA( + n_components=optimal_dimensions, random_state=self.random_state + ) transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, @@ -460,7 +462,9 @@ def _fit_predict(self, X, y=None) -> np.ndarray: 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.__pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + self.__pca = PCA( + n_components=optimal_dimensions, random_state=self.random_state + ) transformed_data_pca = self.__pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, From 67876a2792edfbb3828a2c15011ecaf29859d6a5 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 00:25:23 +0400 Subject: [PATCH 093/103] Update _r_cluster.py --- aeon/clustering/feature_based/_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index a8b6323784..ce133f52bf 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -425,7 +425,7 @@ def _fit(self, X, y=None): self.__pca = PCA( n_components=optimal_dimensions, random_state=self.random_state ) - transformed_data_pca = self.pca.fit_transform(X_std) + transformed_data_pca = self.__pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, From 663de25d12194b3bc9c84b5e6afab5c69bb659bc Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 23:49:25 +0400 Subject: [PATCH 094/103] to fix pca dimension issues --- aeon/clustering/feature_based/_r_cluster.py | 34 +++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index ce133f52bf..52d9525e33 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -422,10 +422,10 @@ def _fit(self, X, y=None): 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.__pca = PCA( + self.pca = PCA( n_components=optimal_dimensions, random_state=self.random_state ) - transformed_data_pca = self.__pca.fit_transform(X_std) + transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, @@ -445,9 +445,27 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - transformed_data_pca = self.__pca.fit_transform(X_std) + n_samples, n_features = X_std.shape + if self.pca.n_components != min(self.pca.n_components,n_samples,n_features): + pca = PCA().fit(X_std) + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - return self.estimator.predict(transformed_data_pca) + optimal_dimensions = max( + 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) + ) + pca = PCA( + n_components=optimal_dimensions, random_state=self.random_state + ) + transformed_data_pca = pca.fit_transform(X_std) + estimator = KMeans( + n_clusters=self.n_clusters, + random_state=self.random_state, + n_init=self.n_init, + ) + return estimator.fit_predict(transformed_data_pca) + else: + transformed_data_pca = self.pca.fit_transform(X_std) + return self.estimator.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) @@ -461,11 +479,10 @@ def _fit_predict(self, X, y=None) -> np.ndarray: optimal_dimensions = max( 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - - self.__pca = PCA( + self.pca = PCA( n_components=optimal_dimensions, random_state=self.random_state ) - transformed_data_pca = self.__pca.fit_transform(X_std) + transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, @@ -493,8 +510,5 @@ def _get_test_params(cls, parameter_set="default") -> dict: `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { - "n_kernels": 84, - "max_dilations_per_kernel": 32, - "n_clusters": 8, "random_state": 1, } From c6b26581667bb7094afcc4192c3e94060ac9d8c4 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 19:50:04 +0000 Subject: [PATCH 095/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/_r_cluster.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 52d9525e33..95aedafe1d 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -422,9 +422,7 @@ def _fit(self, X, y=None): 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.pca = PCA( - n_components=optimal_dimensions, random_state=self.random_state - ) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, @@ -446,23 +444,21 @@ def _predict(self, X, y=None) -> np.ndarray: X_std = self.scaler.fit_transform(transformed_data) n_samples, n_features = X_std.shape - if self.pca.n_components != min(self.pca.n_components,n_samples,n_features): + if self.pca.n_components != min(self.pca.n_components, n_samples, n_features): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) optimal_dimensions = max( 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - pca = PCA( - n_components=optimal_dimensions, random_state=self.random_state - ) + pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = pca.fit_transform(X_std) estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, n_init=self.n_init, ) - return estimator.fit_predict(transformed_data_pca) + return estimator.fit_predict(transformed_data_pca) else: transformed_data_pca = self.pca.fit_transform(X_std) return self.estimator.predict(transformed_data_pca) @@ -479,9 +475,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: optimal_dimensions = max( 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) - self.pca = PCA( - n_components=optimal_dimensions, random_state=self.random_state - ) + self.pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = self.pca.fit_transform(X_std) self.estimator = KMeans( n_clusters=self.n_clusters, From c0880f36cb0d4110ba19d4cbf418828c63757674 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 23:58:20 +0400 Subject: [PATCH 096/103] to fix test case num cluster --- aeon/clustering/feature_based/_r_cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 95aedafe1d..a6a1510005 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -504,5 +504,6 @@ def _get_test_params(cls, parameter_set="default") -> dict: `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { + "n_clusters": 2, "random_state": 1, } From b387cae82971ed73fee3b5d48c83a70fe3cb8b75 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 21 Dec 2024 02:46:24 +0400 Subject: [PATCH 097/103] updated test case for RClusterer --- .../feature_based/tests/test_r_cluster.py | 48 ++++++++++++++----- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index b08042c3b2..21d39e638c 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -1,21 +1,43 @@ """Test For RCluster.""" import numpy as np - +import pytest from aeon.clustering.feature_based._r_cluster import RClusterer -from aeon.datasets import load_gunpoint +from sklearn import metrics +from aeon.utils.validation._dependencies import _check_estimator_deps +X_ = [[1.5980065 , 1.5994389 , 1.5705293, 1.5504735, 1.507371, 1.4343414, + 1.3689859, 1.3052934, 1.2103053, 1.1166533 ], + [1.7011456 , 1.670645, 1.6188844, 1.5468045 , 1.4754685, 1.3912091, + 1.3058823, 1.237313, 1.1534138, 1.0696899 ], + [1.722342, 1.6953288, 1.656946, 1.6063123, 1.5118241, 1.4141477, + 1.3136877, 1.2132338, 1.1129779, 1.0150805 ], + [1.7262632, 1.659836, 1.5731083, 1.4962643, 1.4090704, 1.3324426, + 1.2457422 , 1.1588819, 1.0733612, 0.9871649 ], + [1.7789757 , 1.7612025, 1.7030841, 1.610572, 1.4920881, 1.3686543, + 1.2447608 , 1.1209 , 1.0107619, 0.9001682 ], + [1.7996215, 1.7427012, 1.6864861, 1.6326717, 1.5324101, 1.4225861, + 1.3113219, 1.2012383, 1.0899248, 0.9785759 ], + [1.7490938, 1.7266423, 1.6593817, 1.5595723, 1.4572895, 1.355191, + 1.2521086, 1.1618543, 1.0623266, 0.9609945 ], + [1.3476895, 1.2373582, 1.1288056, 1.0218658, 0.9392247, 0.84710395, + 0.75024295, 0.65884495, 0.56604975, 0.4741342 ], + [1.6956215, 1.633777, 1.5959885, 1.5069915, 1.4142802, 1.3230939, + 1.2419277, 1.1857506, 1.1216865, 1.0483568 ], + [1.722719, 1.7132868, 1.6652519, 1.586769, 1.4954436, 1.4038439, + 1.3122748, 1.2204062, 1.1295636, 1.0408053 ]] +Y = ['22', '28', '21', '15', '2', '18', '21', '36', '11', '21'] +@pytest.mark.skipif( + not _check_estimator_deps(RClusterer, severity="none"), + reason="skip test if required soft dependencies not available", +) def test_r_cluster(): """Test implementation of RCluster.""" - X_train, y_train = load_gunpoint(split="train") - - num_points = 20 - - X_train = X_train[:num_points] - - rcluster = RClusterer(random_state=1, n_clusters=2) - rcluster.fit(X_train) - train_result = rcluster.predict(X_train) - labs = rcluster.labels_ - assert np.array_equal(labs, train_result) + X_train = np.array(X_) + num_clusters = len(np.unique(Y)) + X = np.expand_dims(X_train, axis=1) + Rcluster = RClusterer(n_clusters=num_clusters, n_init=10) + labels_pred1 = Rcluster.fit_predict(X) + score = metrics.adjusted_rand_score(labels_true=Y, labels_pred=labels_pred1) + assert score > 0.36 From ba3f7ccb0c19e50d897201f08dfdc24074e038cf Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 22:46:58 +0000 Subject: [PATCH 098/103] Automatic `pre-commit` fixes --- .../feature_based/tests/test_r_cluster.py | 148 +++++++++++++++--- 1 file changed, 126 insertions(+), 22 deletions(-) diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index 21d39e638c..eaf412d732 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -2,31 +2,135 @@ import numpy as np import pytest -from aeon.clustering.feature_based._r_cluster import RClusterer from sklearn import metrics + +from aeon.clustering.feature_based._r_cluster import RClusterer from aeon.utils.validation._dependencies import _check_estimator_deps -X_ = [[1.5980065 , 1.5994389 , 1.5705293, 1.5504735, 1.507371, 1.4343414, - 1.3689859, 1.3052934, 1.2103053, 1.1166533 ], - [1.7011456 , 1.670645, 1.6188844, 1.5468045 , 1.4754685, 1.3912091, - 1.3058823, 1.237313, 1.1534138, 1.0696899 ], - [1.722342, 1.6953288, 1.656946, 1.6063123, 1.5118241, 1.4141477, - 1.3136877, 1.2132338, 1.1129779, 1.0150805 ], - [1.7262632, 1.659836, 1.5731083, 1.4962643, 1.4090704, 1.3324426, - 1.2457422 , 1.1588819, 1.0733612, 0.9871649 ], - [1.7789757 , 1.7612025, 1.7030841, 1.610572, 1.4920881, 1.3686543, - 1.2447608 , 1.1209 , 1.0107619, 0.9001682 ], - [1.7996215, 1.7427012, 1.6864861, 1.6326717, 1.5324101, 1.4225861, - 1.3113219, 1.2012383, 1.0899248, 0.9785759 ], - [1.7490938, 1.7266423, 1.6593817, 1.5595723, 1.4572895, 1.355191, - 1.2521086, 1.1618543, 1.0623266, 0.9609945 ], - [1.3476895, 1.2373582, 1.1288056, 1.0218658, 0.9392247, 0.84710395, - 0.75024295, 0.65884495, 0.56604975, 0.4741342 ], - [1.6956215, 1.633777, 1.5959885, 1.5069915, 1.4142802, 1.3230939, - 1.2419277, 1.1857506, 1.1216865, 1.0483568 ], - [1.722719, 1.7132868, 1.6652519, 1.586769, 1.4954436, 1.4038439, - 1.3122748, 1.2204062, 1.1295636, 1.0408053 ]] -Y = ['22', '28', '21', '15', '2', '18', '21', '36', '11', '21'] +X_ = [ + [ + 1.5980065, + 1.5994389, + 1.5705293, + 1.5504735, + 1.507371, + 1.4343414, + 1.3689859, + 1.3052934, + 1.2103053, + 1.1166533, + ], + [ + 1.7011456, + 1.670645, + 1.6188844, + 1.5468045, + 1.4754685, + 1.3912091, + 1.3058823, + 1.237313, + 1.1534138, + 1.0696899, + ], + [ + 1.722342, + 1.6953288, + 1.656946, + 1.6063123, + 1.5118241, + 1.4141477, + 1.3136877, + 1.2132338, + 1.1129779, + 1.0150805, + ], + [ + 1.7262632, + 1.659836, + 1.5731083, + 1.4962643, + 1.4090704, + 1.3324426, + 1.2457422, + 1.1588819, + 1.0733612, + 0.9871649, + ], + [ + 1.7789757, + 1.7612025, + 1.7030841, + 1.610572, + 1.4920881, + 1.3686543, + 1.2447608, + 1.1209, + 1.0107619, + 0.9001682, + ], + [ + 1.7996215, + 1.7427012, + 1.6864861, + 1.6326717, + 1.5324101, + 1.4225861, + 1.3113219, + 1.2012383, + 1.0899248, + 0.9785759, + ], + [ + 1.7490938, + 1.7266423, + 1.6593817, + 1.5595723, + 1.4572895, + 1.355191, + 1.2521086, + 1.1618543, + 1.0623266, + 0.9609945, + ], + [ + 1.3476895, + 1.2373582, + 1.1288056, + 1.0218658, + 0.9392247, + 0.84710395, + 0.75024295, + 0.65884495, + 0.56604975, + 0.4741342, + ], + [ + 1.6956215, + 1.633777, + 1.5959885, + 1.5069915, + 1.4142802, + 1.3230939, + 1.2419277, + 1.1857506, + 1.1216865, + 1.0483568, + ], + [ + 1.722719, + 1.7132868, + 1.6652519, + 1.586769, + 1.4954436, + 1.4038439, + 1.3122748, + 1.2204062, + 1.1295636, + 1.0408053, + ], +] +Y = ["22", "28", "21", "15", "2", "18", "21", "36", "11", "21"] + @pytest.mark.skipif( not _check_estimator_deps(RClusterer, severity="none"), From 73d8b72e0c97105793c1bb683253734e74dba6a4 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 21 Dec 2024 03:01:51 +0400 Subject: [PATCH 099/103] updated test case --- aeon/clustering/feature_based/tests/test_r_cluster.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index eaf412d732..21c08bd8e9 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -139,9 +139,8 @@ def test_r_cluster(): """Test implementation of RCluster.""" X_train = np.array(X_) - num_clusters = len(np.unique(Y)) X = np.expand_dims(X_train, axis=1) - Rcluster = RClusterer(n_clusters=num_clusters, n_init=10) + Rcluster = RClusterer(n_clusters=8, n_init=10,random_state=1) labels_pred1 = Rcluster.fit_predict(X) score = metrics.adjusted_rand_score(labels_true=Y, labels_pred=labels_pred1) assert score > 0.36 From 4c120314cc43fbff64b3ba9ffafee4b320fb0921 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 23:02:23 +0000 Subject: [PATCH 100/103] Automatic `pre-commit` fixes --- aeon/clustering/feature_based/tests/test_r_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py index 21c08bd8e9..ff8fcd2372 100644 --- a/aeon/clustering/feature_based/tests/test_r_cluster.py +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -140,7 +140,7 @@ def test_r_cluster(): """Test implementation of RCluster.""" X_train = np.array(X_) X = np.expand_dims(X_train, axis=1) - Rcluster = RClusterer(n_clusters=8, n_init=10,random_state=1) + Rcluster = RClusterer(n_clusters=8, n_init=10, random_state=1) labels_pred1 = Rcluster.fit_predict(X) score = metrics.adjusted_rand_score(labels_true=Y, labels_pred=labels_pred1) assert score > 0.36 From 33670467b420cdfda7a9f39903260026be6540aa Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 9 Jan 2025 22:29:20 +0400 Subject: [PATCH 101/103] Update _r_cluster.py to fix docs string --- aeon/clustering/feature_based/_r_cluster.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index a6a1510005..3f3f157671 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -33,17 +33,17 @@ class RClusterer(BaseClusterer): over the time series,capturing patterns at varying scales num_cluster : int , default = 8 The number of clusters used - random_state : int, Random state or None, default=None - n_jobs : int, default=1 - The number of jobs to run in parallel for `transform`. ``-1`` - means using all - processors. num_features : int, default=500 Number of features need for fit_dilations method. n_init : int, default=10 Number of times the R-Cluster algorithm will be run with different centroid seeds. The final result will be the best output of n_init consecutive runs in terms of inertia. + random_state : int, Random state or None, default=None + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. Notes ----- From 80421e418e187d837b385cb2d7687ab60f38740a Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 9 Jan 2025 22:29:47 +0400 Subject: [PATCH 102/103] Update _r_cluster.py to update random state --- aeon/clustering/feature_based/_r_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 3f3f157671..495260a6a7 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -505,5 +505,4 @@ def _get_test_params(cls, parameter_set="default") -> dict: """ return { "n_clusters": 2, - "random_state": 1, } From 22762efcbfe8166bb6e3a2501dead1e24f687325 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:04:53 +0400 Subject: [PATCH 103/103] Updated _r_cluster.py to update predict --- aeon/clustering/feature_based/_r_cluster.py | 25 ++------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index 495260a6a7..24cbc03a9d 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -433,35 +433,14 @@ def _fit(self, X, y=None): self.labels_ = self.estimator.labels_ def _predict(self, X, y=None) -> np.ndarray: - if not self.is_fitted: - raise ValueError( - "Data is not fitted. Please fit the model before using it." - ) - parameters = self._get_parameterised_data(X) transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - n_samples, n_features = X_std.shape - if self.pca.n_components != min(self.pca.n_components, n_samples, n_features): - pca = PCA().fit(X_std) - optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - optimal_dimensions = max( - 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) - ) - pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) - transformed_data_pca = pca.fit_transform(X_std) - estimator = KMeans( - n_clusters=self.n_clusters, - random_state=self.random_state, - n_init=self.n_init, - ) - return estimator.fit_predict(transformed_data_pca) - else: - transformed_data_pca = self.pca.fit_transform(X_std) - return self.estimator.predict(transformed_data_pca) + transformed_data_pca = self.pca.fit_transform(X_std) + return self.estimator.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X)