From e9795f1b7aff03d3bec2e32e3c643a6f945de7ee Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Thu, 19 Dec 2024 14:48:51 +0400 Subject: [PATCH 01/42] =?UTF-8?q?Added=20IDK=C2=B2=20and=20s-IDK=C2=B2=20a?= =?UTF-8?q?nomaly=20detector=20to=20aeon?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- aeon/anomaly_detection/_idk.py | 152 +++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 aeon/anomaly_detection/_idk.py diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py new file mode 100644 index 0000000000..678fbebb7e --- /dev/null +++ b/aeon/anomaly_detection/_idk.py @@ -0,0 +1,152 @@ +"""IDK² and s-IDK² anomaly detector.""" + +import random +import numpy as np +from aeon.anomaly_detection.base import BaseAnomalyDetector + +class IDK(BaseAnomalyDetector): + """IDK² and s-IDK² anomaly detector. + + The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient + anomaly detection, improving accuracy without explicit learning. Its extension, + IDK², simplifies group anomaly detection, outperforming traditional methods in + speed and effectiveness. + + .. list-table:: Capabilities + :stub-columns: 1 + + * - Input data format + - univariate + * - Output data format + - anomaly scores + * - Learning Type + - unsupervised + + Parameters + ---------- + psi1 : int + Number of samples randomly selected in each iteration for the feature map matrix. + psi2 : int + Number of samples used for the second-stage feature map embedding. + width : int + Size of the sliding or fixed-width window for anomaly detection. + t : int, default=100 + Number of iterations (time steps) for random sampling to construct feature maps. + sliding : bool, default=False + Whether to use a sliding window approach. If True, computes scores for sliding windows; + otherwise, processes fixed-width segments. + Notes + ----- + This implementation is inspired by the Isolation Distributional Kernel (IDK) + approach as detailed in [1]_. + The code is adapted from the open-source repository [2]_. + + References + ---------- + [1]Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. + DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 + + [2] GitHub Repository: + IsolationKernel/Codes: IDK Implementation for Time Series Data + URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS + """ + + _tags = { + "capability:univariate": True, + "capability:multivariate": False, + "capability:missing_values": False, + } + def __init__( + self, + psi1, + psi2, + width, + t=100, + sliding = False, + ): + self.psi1 = psi1 + self.psi2 = psi2 + self.width = width + self.t = t + self.sliding = sliding + super().__init__(axis=0) + + def __IK_inne_fm(self,X, psi, t=100): + onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) + for time in range(t): + sample_num = psi # + sample_list = [p for p in range(len(X))] + sample_list = random.sample(sample_list, sample_num) + sample = X[sample_list, :] + + tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) # n*psi + tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) + point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) # n*psi + + sample2sample = point2sample[sample_list, :] + row, col = np.diag_indices_from(sample2sample) + sample2sample[row, col] = 99999999 + radius_list = np.min(sample2sample, axis=1) + + min_point2sample_index = np.argmin(point2sample, axis=1) + min_dist_point2sample = min_point2sample_index + time * psi + point2sample_value = point2sample[range(len(onepoint_matrix)), min_point2sample_index] + ind = point2sample_value < radius_list[min_point2sample_index] + onepoint_matrix[ind, min_dist_point2sample[ind]] = 1 + + return onepoint_matrix + + def __IDK(self,X, psi, t=100): + point_fm_list = self.__IK_inne_fm(X=X, psi=psi, t=t) + feature_mean_map = np.mean(point_fm_list, axis=0) + return np.dot(point_fm_list, feature_mean_map) / t + + def _IDK_T(self,X): + window_num = int(np.ceil(X.shape[0] / self.width)) + featuremap_count = np.zeros((window_num, self.t *self.psi1)) + onepoint_matrix = np.full((X.shape[0], self.t), -1) + + for time in range(self.t): + sample_num = self.psi1 + sample_list = [p for p in range(X.shape[0])] + sample_list = random.sample(sample_list, sample_num) + sample = X[sample_list, :] + tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) # n*psi + tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) + point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) # n*psi + + sample2sample = point2sample[sample_list, :] + row, col = np.diag_indices_from(sample2sample) + sample2sample[row, col] = 99999999 + + radius_list = np.min(sample2sample, axis=1) + min_dist_point2sample = np.argmin(point2sample, axis=1) # index + + for i in range(X.shape[0]): + if point2sample[i][min_dist_point2sample[i]] < radius_list[min_dist_point2sample[i]]: + onepoint_matrix[i][time] = min_dist_point2sample[i] + time * self.psi1 + featuremap_count[(int)(i / self.width)][onepoint_matrix[i][time]] += 1 + + + for i in range((int)(X.shape[0] / self.width)): + featuremap_count[i] /= self.width + isextra = X.shape[0] - (int)(X.shape[0] / self.width) * self.width + if isextra > 0: + featuremap_count[-1] /= isextra + + if isextra > 0: + featuremap_count = np.delete(featuremap_count, [featuremap_count.shape[0] - 1], axis=0) + + return self.__IDK(featuremap_count, psi=self.psi2, t=self.t) + def _IDK_square_sliding(self,X): + point_fm_list = self.__IK_inne_fm(X=X, psi=self.psi1, t=self.t) + point_fm_list=np.insert(point_fm_list, 0, 0, axis=0) + cumsum=np.cumsum(point_fm_list,axis=0) + + subsequence_fm_list=(cumsum[self.width:]-cumsum[:-self.width])/float(self.width) + + return self.__IDK(X=subsequence_fm_list, psi=self.psi2, t=self.t) + def _predict(self,X): + if self.sliding: + return self._IDK_square_sliding(X) + return self._IDK_T(X) \ No newline at end of file From ff4b576aa6bd149ad96a349b3840c90894c1cc57 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Thu, 19 Dec 2024 14:51:25 +0400 Subject: [PATCH 02/42] Added IDK to init --- aeon/anomaly_detection/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 8ccd9163c3..61d8df6e86 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -13,6 +13,7 @@ "PyODAdapter", "STOMP", "STRAY", + "IDK", ] from aeon.anomaly_detection._cblof import CBLOF @@ -27,3 +28,4 @@ from aeon.anomaly_detection._pyodadapter import PyODAdapter from aeon.anomaly_detection._stomp import STOMP from aeon.anomaly_detection._stray import STRAY +from aeon.anomaly_detection._idk import IDK \ No newline at end of file From 7709a7dd17fdf183db825dcbd9a4a500cfc43516 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Thu, 19 Dec 2024 14:53:04 +0400 Subject: [PATCH 03/42] Added IDK to docs --- docs/api_reference/anomaly_detection.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index 7db535a9be..fba5b98f57 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -34,6 +34,7 @@ Detectors PyODAdapter STOMP STRAY + IDK Base ---- From 7f2916f754d741cd51bc14206c1fd6c5986836bf Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 11:18:31 +0000 Subject: [PATCH 04/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/__init__.py | 2 +- aeon/anomaly_detection/_idk.py | 151 ++++++++++++++++------------- 2 files changed, 86 insertions(+), 67 deletions(-) diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 61d8df6e86..4ef6e5fedc 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -19,6 +19,7 @@ from aeon.anomaly_detection._cblof import CBLOF from aeon.anomaly_detection._copod import COPOD from aeon.anomaly_detection._dwt_mlead import DWT_MLEAD +from aeon.anomaly_detection._idk import IDK from aeon.anomaly_detection._iforest import IsolationForest from aeon.anomaly_detection._kmeans import KMeansAD from aeon.anomaly_detection._left_stampi import LeftSTAMPi @@ -28,4 +29,3 @@ from aeon.anomaly_detection._pyodadapter import PyODAdapter from aeon.anomaly_detection._stomp import STOMP from aeon.anomaly_detection._stray import STRAY -from aeon.anomaly_detection._idk import IDK \ No newline at end of file diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 678fbebb7e..709ceae6ae 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -1,77 +1,82 @@ """IDK² and s-IDK² anomaly detector.""" import random + import numpy as np + from aeon.anomaly_detection.base import BaseAnomalyDetector + class IDK(BaseAnomalyDetector): """IDK² and s-IDK² anomaly detector. - The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient - anomaly detection, improving accuracy without explicit learning. Its extension, - IDK², simplifies group anomaly detection, outperforming traditional methods in - speed and effectiveness. - - .. list-table:: Capabilities - :stub-columns: 1 - - * - Input data format - - univariate - * - Output data format - - anomaly scores - * - Learning Type - - unsupervised - - Parameters - ---------- - psi1 : int - Number of samples randomly selected in each iteration for the feature map matrix. - psi2 : int - Number of samples used for the second-stage feature map embedding. - width : int - Size of the sliding or fixed-width window for anomaly detection. - t : int, default=100 - Number of iterations (time steps) for random sampling to construct feature maps. - sliding : bool, default=False - Whether to use a sliding window approach. If True, computes scores for sliding windows; - otherwise, processes fixed-width segments. - Notes - ----- - This implementation is inspired by the Isolation Distributional Kernel (IDK) - approach as detailed in [1]_. - The code is adapted from the open-source repository [2]_. - - References - ---------- - [1]Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. - DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 - - [2] GitHub Repository: - IsolationKernel/Codes: IDK Implementation for Time Series Data - URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS - """ + The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient + anomaly detection, improving accuracy without explicit learning. Its extension, + IDK², simplifies group anomaly detection, outperforming traditional methods in + speed and effectiveness. + + .. list-table:: Capabilities + :stub-columns: 1 + + * - Input data format + - univariate + * - Output data format + - anomaly scores + * - Learning Type + - unsupervised + + Parameters + ---------- + psi1 : int + Number of samples randomly selected in each iteration for the feature map matrix. + psi2 : int + Number of samples used for the second-stage feature map embedding. + width : int + Size of the sliding or fixed-width window for anomaly detection. + t : int, default=100 + Number of iterations (time steps) for random sampling to construct feature maps. + sliding : bool, default=False + Whether to use a sliding window approach. If True, computes scores for sliding windows; + otherwise, processes fixed-width segments. + + Notes + ----- + This implementation is inspired by the Isolation Distributional Kernel (IDK) + approach as detailed in [1]_. + The code is adapted from the open-source repository [2]_. + + References + ---------- + [1]Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. + DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 + + [2] GitHub Repository: + IsolationKernel/Codes: IDK Implementation for Time Series Data + URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS + """ _tags = { "capability:univariate": True, "capability:multivariate": False, "capability:missing_values": False, } + def __init__( - self, - psi1, - psi2, - width, - t=100, - sliding = False, + self, + psi1, + psi2, + width, + t=100, + sliding=False, ): self.psi1 = psi1 self.psi2 = psi2 self.width = width self.t = t - self.sliding = sliding + self.sliding = sliding super().__init__(axis=0) - def __IK_inne_fm(self,X, psi, t=100): + def __IK_inne_fm(self, X, psi, t=100): onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) for time in range(t): sample_num = psi # @@ -90,20 +95,22 @@ def __IK_inne_fm(self,X, psi, t=100): min_point2sample_index = np.argmin(point2sample, axis=1) min_dist_point2sample = min_point2sample_index + time * psi - point2sample_value = point2sample[range(len(onepoint_matrix)), min_point2sample_index] + point2sample_value = point2sample[ + range(len(onepoint_matrix)), min_point2sample_index + ] ind = point2sample_value < radius_list[min_point2sample_index] onepoint_matrix[ind, min_dist_point2sample[ind]] = 1 return onepoint_matrix - def __IDK(self,X, psi, t=100): + def __IDK(self, X, psi, t=100): point_fm_list = self.__IK_inne_fm(X=X, psi=psi, t=t) feature_mean_map = np.mean(point_fm_list, axis=0) return np.dot(point_fm_list, feature_mean_map) / t - def _IDK_T(self,X): + def _IDK_T(self, X): window_num = int(np.ceil(X.shape[0] / self.width)) - featuremap_count = np.zeros((window_num, self.t *self.psi1)) + featuremap_count = np.zeros((window_num, self.t * self.psi1)) onepoint_matrix = np.full((X.shape[0], self.t), -1) for time in range(self.t): @@ -123,10 +130,16 @@ def _IDK_T(self,X): min_dist_point2sample = np.argmin(point2sample, axis=1) # index for i in range(X.shape[0]): - if point2sample[i][min_dist_point2sample[i]] < radius_list[min_dist_point2sample[i]]: - onepoint_matrix[i][time] = min_dist_point2sample[i] + time * self.psi1 - featuremap_count[(int)(i / self.width)][onepoint_matrix[i][time]] += 1 - + if ( + point2sample[i][min_dist_point2sample[i]] + < radius_list[min_dist_point2sample[i]] + ): + onepoint_matrix[i][time] = ( + min_dist_point2sample[i] + time * self.psi1 + ) + featuremap_count[(int)(i / self.width)][ + onepoint_matrix[i][time] + ] += 1 for i in range((int)(X.shape[0] / self.width)): featuremap_count[i] /= self.width @@ -135,18 +148,24 @@ def _IDK_T(self,X): featuremap_count[-1] /= isextra if isextra > 0: - featuremap_count = np.delete(featuremap_count, [featuremap_count.shape[0] - 1], axis=0) + featuremap_count = np.delete( + featuremap_count, [featuremap_count.shape[0] - 1], axis=0 + ) return self.__IDK(featuremap_count, psi=self.psi2, t=self.t) - def _IDK_square_sliding(self,X): + + def _IDK_square_sliding(self, X): point_fm_list = self.__IK_inne_fm(X=X, psi=self.psi1, t=self.t) - point_fm_list=np.insert(point_fm_list, 0, 0, axis=0) - cumsum=np.cumsum(point_fm_list,axis=0) + point_fm_list = np.insert(point_fm_list, 0, 0, axis=0) + cumsum = np.cumsum(point_fm_list, axis=0) - subsequence_fm_list=(cumsum[self.width:]-cumsum[:-self.width])/float(self.width) + subsequence_fm_list = (cumsum[self.width :] - cumsum[: -self.width]) / float( + self.width + ) return self.__IDK(X=subsequence_fm_list, psi=self.psi2, t=self.t) - def _predict(self,X): + + def _predict(self, X): if self.sliding: return self._IDK_square_sliding(X) - return self._IDK_T(X) \ No newline at end of file + return self._IDK_T(X) From 18516df2eeafb9971cca5d0907c37a4e6399b6f2 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:21:01 +0400 Subject: [PATCH 05/42] Update _idk.py to update docs --- aeon/anomaly_detection/_idk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 709ceae6ae..043e63fd34 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -28,15 +28,18 @@ class IDK(BaseAnomalyDetector): Parameters ---------- psi1 : int - Number of samples randomly selected in each iteration for the feature map matrix. + Number of samples randomly selected in each iteration for the feature + map matrix. psi2 : int Number of samples used for the second-stage feature map embedding. width : int Size of the sliding or fixed-width window for anomaly detection. t : int, default=100 - Number of iterations (time steps) for random sampling to construct feature maps. + Number of iterations (time steps) for random sampling to construct + feature maps. sliding : bool, default=False - Whether to use a sliding window approach. If True, computes scores for sliding windows; + Whether to use a sliding window approach. If True, computes scores + for sliding windows; otherwise, processes fixed-width segments. Notes From dd36f8b2916b16c59438ad674706b0f0db2fbeb6 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 11:21:36 +0000 Subject: [PATCH 06/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 043e63fd34..9d1ae001b6 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -28,17 +28,17 @@ class IDK(BaseAnomalyDetector): Parameters ---------- psi1 : int - Number of samples randomly selected in each iteration for the feature + Number of samples randomly selected in each iteration for the feature map matrix. psi2 : int Number of samples used for the second-stage feature map embedding. width : int Size of the sliding or fixed-width window for anomaly detection. t : int, default=100 - Number of iterations (time steps) for random sampling to construct + Number of iterations (time steps) for random sampling to construct feature maps. sliding : bool, default=False - Whether to use a sliding window approach. If True, computes scores + Whether to use a sliding window approach. If True, computes scores for sliding windows; otherwise, processes fixed-width segments. From 6734468c434e752eed86bef704d40272606f4591 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:30:12 +0400 Subject: [PATCH 07/42] Update _idk.py to add get test param --- aeon/anomaly_detection/_idk.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 9d1ae001b6..48cabb1ef0 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -172,3 +172,25 @@ def _predict(self, X): if self.sliding: return self._IDK_square_sliding(X) return self._IDK_T(X) + @classmethod + def _get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "psi1": 16, + "psi2": 2, + "width": 100, + } From b46a6fb3481104e2e876962a6d1dedb173fd0b1a Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 11:30:53 +0000 Subject: [PATCH 08/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 48cabb1ef0..466c67b883 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -172,6 +172,7 @@ def _predict(self, X): if self.sliding: return self._IDK_square_sliding(X) return self._IDK_T(X) + @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. From ee813137d363e9d3e5b9788656a89846c3196b4d Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:40:14 +0400 Subject: [PATCH 09/42] Update _idk.py to update axis --- aeon/anomaly_detection/_idk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 466c67b883..29e47b70bc 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -60,7 +60,7 @@ class IDK(BaseAnomalyDetector): _tags = { "capability:univariate": True, - "capability:multivariate": False, + "capability:multivariate": True, "capability:missing_values": False, } @@ -77,7 +77,7 @@ def __init__( self.width = width self.t = t self.sliding = sliding - super().__init__(axis=0) + super().__init__(axis=1) def __IK_inne_fm(self, X, psi, t=100): onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) From 4de22ffff122a2158418ccb37b3d8d7ec2bd4352 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:54:31 +0400 Subject: [PATCH 10/42] Update _idk.py to remove univariate --- aeon/anomaly_detection/_idk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 29e47b70bc..094ace0959 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -59,7 +59,7 @@ class IDK(BaseAnomalyDetector): """ _tags = { - "capability:univariate": True, + "capability:univariate": False, "capability:multivariate": True, "capability:missing_values": False, } From c7f057a206b22b419a8a6aeb0b98007f49a87727 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:05:21 +0400 Subject: [PATCH 11/42] Update _idk.py changed axis --- aeon/anomaly_detection/_idk.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 094ace0959..735abf2c2f 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -59,9 +59,10 @@ class IDK(BaseAnomalyDetector): """ _tags = { - "capability:univariate": False, - "capability:multivariate": True, + "capability:univariate": True, + "capability:multivariate": False, "capability:missing_values": False, + "fit_is_empty": False, } def __init__( @@ -77,7 +78,7 @@ def __init__( self.width = width self.t = t self.sliding = sliding - super().__init__(axis=1) + super().__init__(axis=0) def __IK_inne_fm(self, X, psi, t=100): onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) From c77f55601ea8567ca956cd5520e9668bdd49f4e7 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:44:33 +0400 Subject: [PATCH 12/42] Update _idk.py to make test_param small --- aeon/anomaly_detection/_idk.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 735abf2c2f..2f105f6c60 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -62,7 +62,6 @@ class IDK(BaseAnomalyDetector): "capability:univariate": True, "capability:multivariate": False, "capability:missing_values": False, - "fit_is_empty": False, } def __init__( @@ -192,7 +191,7 @@ def _get_test_params(cls, parameter_set="default"): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { - "psi1": 16, - "psi2": 2, - "width": 100, + "psi1": 2, + "psi2": 1, + "width": 10, } From 6d8467dd70a84cdc722e1bd2bacf3761305fc64c Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:00:09 +0400 Subject: [PATCH 13/42] Update _idk.py change width of test case to 1 --- aeon/anomaly_detection/_idk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 2f105f6c60..3ec3b2fe7c 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -193,5 +193,5 @@ def _get_test_params(cls, parameter_set="default"): return { "psi1": 2, "psi2": 1, - "width": 10, + "width": 1, } From 4faa551c7ba76c7d4d1d17d57917074032755431 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:04:48 +0400 Subject: [PATCH 14/42] Update _idk.py changes psi1 and psi2 test values --- aeon/anomaly_detection/_idk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 3ec3b2fe7c..951656693b 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -191,7 +191,7 @@ def _get_test_params(cls, parameter_set="default"): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { - "psi1": 2, - "psi2": 1, + "psi1": 4, + "psi2": 2, "width": 1, } From af6ea0430c479907d4763f2fdcc74d4b61d586fb Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:45:36 +0400 Subject: [PATCH 15/42] Update _idk.py to add extra random_state --- aeon/anomaly_detection/_idk.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 951656693b..f6a57d46da 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -41,6 +41,7 @@ class IDK(BaseAnomalyDetector): Whether to use a sliding window approach. If True, computes scores for sliding windows; otherwise, processes fixed-width segments. + random_state : int, Random state or None, default=None Notes ----- @@ -65,21 +66,25 @@ class IDK(BaseAnomalyDetector): } def __init__( - self, - psi1, - psi2, - width, - t=100, - sliding=False, + self, + psi1, + psi2, + width, + t=100, + sliding = False, + random_state=None, ): self.psi1 = psi1 self.psi2 = psi2 self.width = width self.t = t - self.sliding = sliding + self.sliding = sliding + self.random_state = random_state super().__init__(axis=0) def __IK_inne_fm(self, X, psi, t=100): + np.random.seed(self.random_state) + random.seed(self.random_state) onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) for time in range(t): sample_num = psi # @@ -112,6 +117,8 @@ def __IDK(self, X, psi, t=100): return np.dot(point_fm_list, feature_mean_map) / t def _IDK_T(self, X): + np.random.seed(self.random_state) + random.seed(self.random_state) window_num = int(np.ceil(X.shape[0] / self.width)) featuremap_count = np.zeros((window_num, self.t * self.psi1)) onepoint_matrix = np.full((X.shape[0], self.t), -1) From 172fd80a89df3ec65506967a31fdefcc3f3da495 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 13:46:11 +0000 Subject: [PATCH 16/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index f6a57d46da..b3d03a7ac6 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -66,19 +66,19 @@ class IDK(BaseAnomalyDetector): } def __init__( - self, - psi1, - psi2, - width, - t=100, - sliding = False, - random_state=None, + self, + psi1, + psi2, + width, + t=100, + sliding=False, + random_state=None, ): self.psi1 = psi1 self.psi2 = psi2 self.width = width self.t = t - self.sliding = sliding + self.sliding = sliding self.random_state = random_state super().__init__(axis=0) From 8cef628822eacdcb1e2c7ebba1d09881265f9a36 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:46:42 +0400 Subject: [PATCH 17/42] Update _idk.py to add random_state for test_param --- aeon/anomaly_detection/_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index b3d03a7ac6..e3e9bd4196 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -201,4 +201,5 @@ def _get_test_params(cls, parameter_set="default"): "psi1": 4, "psi2": 2, "width": 1, + "random_state":1, } From 08d5ae8da8eeb36dba690f62d4aaab762f7ee12e Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Thu, 19 Dec 2024 13:47:23 +0000 Subject: [PATCH 18/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index e3e9bd4196..602d38acd1 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -201,5 +201,5 @@ def _get_test_params(cls, parameter_set="default"): "psi1": 4, "psi2": 2, "width": 1, - "random_state":1, + "random_state": 1, } From f78174ed20667ff6cb047de28051594ad4c5efb4 Mon Sep 17 00:00:00 2001 From: Ramana Raja Date: Fri, 20 Dec 2024 16:02:47 +0400 Subject: [PATCH 19/42] test cases and changes have been added as requested by the moderators --- aeon/anomaly_detection/_idk.py | 122 +++++++++++++---------- aeon/anomaly_detection/tests/test_idk.py | 27 +++++ 2 files changed, 99 insertions(+), 50 deletions(-) create mode 100644 aeon/anomaly_detection/tests/test_idk.py diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 678fbebb7e..1e778f5ad7 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -1,6 +1,5 @@ """IDK² and s-IDK² anomaly detector.""" -import random import numpy as np from aeon.anomaly_detection.base import BaseAnomalyDetector @@ -25,16 +24,36 @@ class IDK(BaseAnomalyDetector): Parameters ---------- psi1 : int - Number of samples randomly selected in each iteration for the feature map matrix. + The number of samples randomly selected in each iteration to construct the + feature map matrix during the first stage. This parameter determines the + granularity of the first-stage feature representation. Higher values allow + the model to capture more detailed data characteristics but + increase computational complexity. psi2 : int - Number of samples used for the second-stage feature map embedding. + The number of samples randomly selected in each iteration to construct + the feature map matrix during the second stage. This parameter + determines the granularity of the second-stage feature representation. + Higher values allow the model to capture more detailed + data characteristics but increase computational complexity. width : int - Size of the sliding or fixed-width window for anomaly detection. + The size of the sliding or fixed-width window used for anomaly detection. + For fixed-width processing, this defines the length of each segment analyzed. + In sliding window mode, it specifies the length of the window moving across the data. + Smaller values lead to more localized anomaly detection, while larger values capture + broader trends. t : int, default=100 - Number of iterations (time steps) for random sampling to construct feature maps. + The number of iterations (time steps) for random sampling to construct the feature + maps. Each iteration generates a set of random samples, which contribute to the + feature map matrix. Larger values improve the robustness of the feature maps + but increase the runtime. sliding : bool, default=False - Whether to use a sliding window approach. If True, computes scores for sliding windows; - otherwise, processes fixed-width segments. + Determines whether a sliding window approach is used for anomaly detection. + If True, the model computes scores for overlapping windows across the time series, + providing more detailed anomaly scores at each step. If False, the model processes + the data in fixed-width segments, offering faster computation at the cost of granularity. + rng : np.random.Generator + A NumPy random generator instance to ensure reproducibility and avoid global RNG state changes. + Notes ----- This implementation is inspired by the Isolation Distributional Kernel (IDK) @@ -43,7 +62,7 @@ class IDK(BaseAnomalyDetector): References ---------- - [1]Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. + [1] Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 [2] GitHub Repository: @@ -57,33 +76,34 @@ class IDK(BaseAnomalyDetector): "capability:missing_values": False, } def __init__( - self, - psi1, - psi2, - width, - t=100, - sliding = False, - ): + self, + psi1: int, + psi2: int, + width: int, + t: int = 100, + sliding: bool = False, + rng: np.random.Generator = None, + ) -> None: self.psi1 = psi1 self.psi2 = psi2 self.width = width self.t = t - self.sliding = sliding + self.sliding = sliding + self.rng = rng or np.random.default_rng() super().__init__(axis=0) - def __IK_inne_fm(self,X, psi, t=100): - onepoint_matrix = np.zeros((X.shape[0], (int)(t * psi)), dtype=int) + def _ik_inne_fm(self, X, psi, t=100): + """Compute feature map for the Isolation Distributional Kernel.""" + onepoint_matrix = np.zeros((X.shape[0], t * psi), dtype=int) for time in range(t): - sample_num = psi # - sample_list = [p for p in range(len(X))] - sample_list = random.sample(sample_list, sample_num) - sample = X[sample_list, :] + sample_indices = self.rng.choice(len(X), size=psi, replace=False) + sample = X[sample_indices, :] - tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) # n*psi + tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) - point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) # n*psi + point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) - sample2sample = point2sample[sample_list, :] + sample2sample = point2sample[sample_indices, :] row, col = np.diag_indices_from(sample2sample) sample2sample[row, col] = 99999999 radius_list = np.min(sample2sample, axis=1) @@ -96,57 +116,59 @@ def __IK_inne_fm(self,X, psi, t=100): return onepoint_matrix - def __IDK(self,X, psi, t=100): - point_fm_list = self.__IK_inne_fm(X=X, psi=psi, t=t) + def _idk(self, X, psi, t=100): + """Compute anomaly scores using the Isolation Distributional Kernel.""" + point_fm_list = self._ik_inne_fm(X=X, psi=psi, t=t) feature_mean_map = np.mean(point_fm_list, axis=0) return np.dot(point_fm_list, feature_mean_map) / t - def _IDK_T(self,X): + def _idk_t(self, X): + """Fixed-width IDK computation.""" window_num = int(np.ceil(X.shape[0] / self.width)) - featuremap_count = np.zeros((window_num, self.t *self.psi1)) + featuremap_count = np.zeros((window_num, self.t * self.psi1)) onepoint_matrix = np.full((X.shape[0], self.t), -1) for time in range(self.t): - sample_num = self.psi1 - sample_list = [p for p in range(X.shape[0])] - sample_list = random.sample(sample_list, sample_num) - sample = X[sample_list, :] - tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) # n*psi + sample_indices = self.rng.choice(X.shape[0], size=self.psi1, replace=False) + sample = X[sample_indices, :] + tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) - point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) # n*psi + point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T) - sample2sample = point2sample[sample_list, :] + sample2sample = point2sample[sample_indices, :] row, col = np.diag_indices_from(sample2sample) sample2sample[row, col] = 99999999 radius_list = np.min(sample2sample, axis=1) - min_dist_point2sample = np.argmin(point2sample, axis=1) # index + min_dist_point2sample = np.argmin(point2sample, axis=1) for i in range(X.shape[0]): if point2sample[i][min_dist_point2sample[i]] < radius_list[min_dist_point2sample[i]]: onepoint_matrix[i][time] = min_dist_point2sample[i] + time * self.psi1 featuremap_count[(int)(i / self.width)][onepoint_matrix[i][time]] += 1 - - for i in range((int)(X.shape[0] / self.width)): + for i in range(window_num): featuremap_count[i] /= self.width isextra = X.shape[0] - (int)(X.shape[0] / self.width) * self.width if isextra > 0: featuremap_count[-1] /= isextra - if isextra > 0: featuremap_count = np.delete(featuremap_count, [featuremap_count.shape[0] - 1], axis=0) - return self.__IDK(featuremap_count, psi=self.psi2, t=self.t) - def _IDK_square_sliding(self,X): - point_fm_list = self.__IK_inne_fm(X=X, psi=self.psi1, t=self.t) - point_fm_list=np.insert(point_fm_list, 0, 0, axis=0) - cumsum=np.cumsum(point_fm_list,axis=0) + return self._idk(featuremap_count, psi=self.psi2, t=self.t) + + def _idk_square_sliding(self, X): + """Sliding window IDK computation.""" + point_fm_list = self._ik_inne_fm(X=X, psi=self.psi1, t=self.t) + point_fm_list = np.insert(point_fm_list, 0, 0, axis=0) + cumsum = np.cumsum(point_fm_list, axis=0) + + subsequence_fm_list = (cumsum[self.width:] - cumsum[:-self.width]) / float(self.width) - subsequence_fm_list=(cumsum[self.width:]-cumsum[:-self.width])/float(self.width) + return self._idk(X=subsequence_fm_list, psi=self.psi2, t=self.t) - return self.__IDK(X=subsequence_fm_list, psi=self.psi2, t=self.t) - def _predict(self,X): + def _predict(self, X): + """Predict anomaly scores for input data.""" if self.sliding: - return self._IDK_square_sliding(X) - return self._IDK_T(X) \ No newline at end of file + return self._idk_square_sliding(X) + return self._idk_t(X) \ No newline at end of file diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py new file mode 100644 index 0000000000..93825b6f88 --- /dev/null +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -0,0 +1,27 @@ +import numpy as np + +from aeon.anomaly_detection import IDK +from sklearn.utils import check_random_state + + +def test_idk_univariate(): + """Test IDK on univariate data.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(100,)) + series[50:58] -= 10 + + ad = IDK(psi1=8, psi2=2, width=1, rng=rng) + pred = ad.fit_predict(series) + assert pred.shape == (100,) + assert pred.dtype == np.float64 + assert 50 <= np.argmax(pred) <= 58 +def test_idk_univariate_sliding(): + """Test IDK with sliding on univariate data.""" + rng = check_random_state(seed=2) + series = rng.normal(size=(100,)) + series[50:58] -= 10 + ad = IDK(psi1=16, psi2=4, width=10, sliding=True, rng=rng) + pred = ad.fit_predict(series) + assert pred.shape == (91,) + assert pred.dtype == np.float64 + assert 50 <= np.argmax(pred) <= 68 \ No newline at end of file From d6b1719b6315fc2d3bec6a4f51e943ed2b70b3cf Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 12:06:10 +0000 Subject: [PATCH 20/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 156 +++++++++++++---------- aeon/anomaly_detection/tests/test_idk.py | 6 +- 2 files changed, 90 insertions(+), 72 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 1e778f5ad7..518fe7e8cb 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -1,80 +1,83 @@ """IDK² and s-IDK² anomaly detector.""" import numpy as np + from aeon.anomaly_detection.base import BaseAnomalyDetector + class IDK(BaseAnomalyDetector): """IDK² and s-IDK² anomaly detector. - The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient - anomaly detection, improving accuracy without explicit learning. Its extension, - IDK², simplifies group anomaly detection, outperforming traditional methods in - speed and effectiveness. - - .. list-table:: Capabilities - :stub-columns: 1 - - * - Input data format - - univariate - * - Output data format - - anomaly scores - * - Learning Type - - unsupervised - - Parameters - ---------- - psi1 : int - The number of samples randomly selected in each iteration to construct the - feature map matrix during the first stage. This parameter determines the - granularity of the first-stage feature representation. Higher values allow - the model to capture more detailed data characteristics but - increase computational complexity. - psi2 : int - The number of samples randomly selected in each iteration to construct - the feature map matrix during the second stage. This parameter - determines the granularity of the second-stage feature representation. - Higher values allow the model to capture more detailed - data characteristics but increase computational complexity. - width : int - The size of the sliding or fixed-width window used for anomaly detection. - For fixed-width processing, this defines the length of each segment analyzed. - In sliding window mode, it specifies the length of the window moving across the data. - Smaller values lead to more localized anomaly detection, while larger values capture - broader trends. - t : int, default=100 - The number of iterations (time steps) for random sampling to construct the feature - maps. Each iteration generates a set of random samples, which contribute to the - feature map matrix. Larger values improve the robustness of the feature maps - but increase the runtime. - sliding : bool, default=False - Determines whether a sliding window approach is used for anomaly detection. - If True, the model computes scores for overlapping windows across the time series, - providing more detailed anomaly scores at each step. If False, the model processes - the data in fixed-width segments, offering faster computation at the cost of granularity. - rng : np.random.Generator - A NumPy random generator instance to ensure reproducibility and avoid global RNG state changes. - - Notes - ----- - This implementation is inspired by the Isolation Distributional Kernel (IDK) - approach as detailed in [1]_. - The code is adapted from the open-source repository [2]_. - - References - ---------- - [1] Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. - DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 - - [2] GitHub Repository: - IsolationKernel/Codes: IDK Implementation for Time Series Data - URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS - """ + The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient + anomaly detection, improving accuracy without explicit learning. Its extension, + IDK², simplifies group anomaly detection, outperforming traditional methods in + speed and effectiveness. + + .. list-table:: Capabilities + :stub-columns: 1 + + * - Input data format + - univariate + * - Output data format + - anomaly scores + * - Learning Type + - unsupervised + + Parameters + ---------- + psi1 : int + The number of samples randomly selected in each iteration to construct the + feature map matrix during the first stage. This parameter determines the + granularity of the first-stage feature representation. Higher values allow + the model to capture more detailed data characteristics but + increase computational complexity. + psi2 : int + The number of samples randomly selected in each iteration to construct + the feature map matrix during the second stage. This parameter + determines the granularity of the second-stage feature representation. + Higher values allow the model to capture more detailed + data characteristics but increase computational complexity. + width : int + The size of the sliding or fixed-width window used for anomaly detection. + For fixed-width processing, this defines the length of each segment analyzed. + In sliding window mode, it specifies the length of the window moving across the data. + Smaller values lead to more localized anomaly detection, while larger values capture + broader trends. + t : int, default=100 + The number of iterations (time steps) for random sampling to construct the feature + maps. Each iteration generates a set of random samples, which contribute to the + feature map matrix. Larger values improve the robustness of the feature maps + but increase the runtime. + sliding : bool, default=False + Determines whether a sliding window approach is used for anomaly detection. + If True, the model computes scores for overlapping windows across the time series, + providing more detailed anomaly scores at each step. If False, the model processes + the data in fixed-width segments, offering faster computation at the cost of granularity. + rng : np.random.Generator + A NumPy random generator instance to ensure reproducibility and avoid global RNG state changes. + + Notes + ----- + This implementation is inspired by the Isolation Distributional Kernel (IDK) + approach as detailed in [1]_. + The code is adapted from the open-source repository [2]_. + + References + ---------- + [1] Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection. + DOI: https://dl.acm.org/doi/10.1145/3394486.3403062 + + [2] GitHub Repository: + IsolationKernel/Codes: IDK Implementation for Time Series Data + URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS + """ _tags = { "capability:univariate": True, "capability:multivariate": False, "capability:missing_values": False, } + def __init__( self, psi1: int, @@ -110,7 +113,9 @@ def _ik_inne_fm(self, X, psi, t=100): min_point2sample_index = np.argmin(point2sample, axis=1) min_dist_point2sample = min_point2sample_index + time * psi - point2sample_value = point2sample[range(len(onepoint_matrix)), min_point2sample_index] + point2sample_value = point2sample[ + range(len(onepoint_matrix)), min_point2sample_index + ] ind = point2sample_value < radius_list[min_point2sample_index] onepoint_matrix[ind, min_dist_point2sample[ind]] = 1 @@ -143,9 +148,16 @@ def _idk_t(self, X): min_dist_point2sample = np.argmin(point2sample, axis=1) for i in range(X.shape[0]): - if point2sample[i][min_dist_point2sample[i]] < radius_list[min_dist_point2sample[i]]: - onepoint_matrix[i][time] = min_dist_point2sample[i] + time * self.psi1 - featuremap_count[(int)(i / self.width)][onepoint_matrix[i][time]] += 1 + if ( + point2sample[i][min_dist_point2sample[i]] + < radius_list[min_dist_point2sample[i]] + ): + onepoint_matrix[i][time] = ( + min_dist_point2sample[i] + time * self.psi1 + ) + featuremap_count[(int)(i / self.width)][ + onepoint_matrix[i][time] + ] += 1 for i in range(window_num): featuremap_count[i] /= self.width @@ -153,7 +165,9 @@ def _idk_t(self, X): if isextra > 0: featuremap_count[-1] /= isextra if isextra > 0: - featuremap_count = np.delete(featuremap_count, [featuremap_count.shape[0] - 1], axis=0) + featuremap_count = np.delete( + featuremap_count, [featuremap_count.shape[0] - 1], axis=0 + ) return self._idk(featuremap_count, psi=self.psi2, t=self.t) @@ -163,7 +177,9 @@ def _idk_square_sliding(self, X): point_fm_list = np.insert(point_fm_list, 0, 0, axis=0) cumsum = np.cumsum(point_fm_list, axis=0) - subsequence_fm_list = (cumsum[self.width:] - cumsum[:-self.width]) / float(self.width) + subsequence_fm_list = (cumsum[self.width :] - cumsum[: -self.width]) / float( + self.width + ) return self._idk(X=subsequence_fm_list, psi=self.psi2, t=self.t) @@ -171,4 +187,4 @@ def _predict(self, X): """Predict anomaly scores for input data.""" if self.sliding: return self._idk_square_sliding(X) - return self._idk_t(X) \ No newline at end of file + return self._idk_t(X) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 93825b6f88..0318905202 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,7 +1,7 @@ import numpy as np +from sklearn.utils import check_random_state from aeon.anomaly_detection import IDK -from sklearn.utils import check_random_state def test_idk_univariate(): @@ -15,6 +15,8 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 + + def test_idk_univariate_sliding(): """Test IDK with sliding on univariate data.""" rng = check_random_state(seed=2) @@ -24,4 +26,4 @@ def test_idk_univariate_sliding(): pred = ad.fit_predict(series) assert pred.shape == (91,) assert pred.dtype == np.float64 - assert 50 <= np.argmax(pred) <= 68 \ No newline at end of file + assert 50 <= np.argmax(pred) <= 68 From 29f3348e7d456b068663fd84d5f24eec69043f8e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:08:52 +0400 Subject: [PATCH 21/42] added test_case random state --- aeon/anomaly_detection/_idk.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 518fe7e8cb..c6a91f26bd 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -188,3 +188,23 @@ def _predict(self, X): if self.sliding: return self._idk_square_sliding(X) return self._idk_t(X) + @classmethod + def _get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "rng": np.random.RandomState(seed=42), + } From 0112c67053cf55575a251cb5bd6a28c8f0b014df Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 12:09:21 +0000 Subject: [PATCH 22/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index c6a91f26bd..6a4c6af18c 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -188,6 +188,7 @@ def _predict(self, X): if self.sliding: return self._idk_square_sliding(X) return self._idk_t(X) + @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. From 4e1ceab19259884bdaaa7116dbb257c8ea7fe10a Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:15:19 +0400 Subject: [PATCH 23/42] fixed docs --- aeon/anomaly_detection/_idk.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 6a4c6af18c..0aca67c6e4 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -40,21 +40,28 @@ class IDK(BaseAnomalyDetector): width : int The size of the sliding or fixed-width window used for anomaly detection. For fixed-width processing, this defines the length of each segment analyzed. - In sliding window mode, it specifies the length of the window moving across the data. - Smaller values lead to more localized anomaly detection, while larger values capture + In sliding window mode, it specifies the length of the window moving + across the data. + Smaller values lead to more localized anomaly detection, while + larger values capture broader trends. t : int, default=100 - The number of iterations (time steps) for random sampling to construct the feature + The number of iterations (time steps) for random sampling to + construct the feature maps. Each iteration generates a set of random samples, which contribute to the feature map matrix. Larger values improve the robustness of the feature maps but increase the runtime. sliding : bool, default=False Determines whether a sliding window approach is used for anomaly detection. - If True, the model computes scores for overlapping windows across the time series, - providing more detailed anomaly scores at each step. If False, the model processes - the data in fixed-width segments, offering faster computation at the cost of granularity. + If True, the model computes scores for overlapping windows across the + time series, + providing more detailed anomaly scores at each step. If False, the + model processes + the data in fixed-width segments, offering faster computation at the + cost of granularity. rng : np.random.Generator - A NumPy random generator instance to ensure reproducibility and avoid global RNG state changes. + A NumPy random generator instance to ensure reproducibility and avoid + global RNG state changes. Notes ----- From 39d9292da32880056e65fd802e9634f16fb7256d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 12:15:52 +0000 Subject: [PATCH 24/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 0aca67c6e4..8c57d33b25 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -40,24 +40,24 @@ class IDK(BaseAnomalyDetector): width : int The size of the sliding or fixed-width window used for anomaly detection. For fixed-width processing, this defines the length of each segment analyzed. - In sliding window mode, it specifies the length of the window moving + In sliding window mode, it specifies the length of the window moving across the data. - Smaller values lead to more localized anomaly detection, while + Smaller values lead to more localized anomaly detection, while larger values capture broader trends. t : int, default=100 - The number of iterations (time steps) for random sampling to + The number of iterations (time steps) for random sampling to construct the feature maps. Each iteration generates a set of random samples, which contribute to the feature map matrix. Larger values improve the robustness of the feature maps but increase the runtime. sliding : bool, default=False Determines whether a sliding window approach is used for anomaly detection. - If True, the model computes scores for overlapping windows across the + If True, the model computes scores for overlapping windows across the time series, - providing more detailed anomaly scores at each step. If False, the + providing more detailed anomaly scores at each step. If False, the model processes - the data in fixed-width segments, offering faster computation at the + the data in fixed-width segments, offering faster computation at the cost of granularity. rng : np.random.Generator A NumPy random generator instance to ensure reproducibility and avoid From 4711ede74aedbc121836395e8acf9752c9dd0d14 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:24:17 +0400 Subject: [PATCH 25/42] Updated docs --- aeon/anomaly_detection/_idk.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 8c57d33b25..8587466cea 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -103,7 +103,6 @@ def __init__( super().__init__(axis=0) def _ik_inne_fm(self, X, psi, t=100): - """Compute feature map for the Isolation Distributional Kernel.""" onepoint_matrix = np.zeros((X.shape[0], t * psi), dtype=int) for time in range(t): sample_indices = self.rng.choice(len(X), size=psi, replace=False) @@ -129,13 +128,11 @@ def _ik_inne_fm(self, X, psi, t=100): return onepoint_matrix def _idk(self, X, psi, t=100): - """Compute anomaly scores using the Isolation Distributional Kernel.""" point_fm_list = self._ik_inne_fm(X=X, psi=psi, t=t) feature_mean_map = np.mean(point_fm_list, axis=0) return np.dot(point_fm_list, feature_mean_map) / t def _idk_t(self, X): - """Fixed-width IDK computation.""" window_num = int(np.ceil(X.shape[0] / self.width)) featuremap_count = np.zeros((window_num, self.t * self.psi1)) onepoint_matrix = np.full((X.shape[0], self.t), -1) @@ -179,7 +176,6 @@ def _idk_t(self, X): return self._idk(featuremap_count, psi=self.psi2, t=self.t) def _idk_square_sliding(self, X): - """Sliding window IDK computation.""" point_fm_list = self._ik_inne_fm(X=X, psi=self.psi1, t=self.t) point_fm_list = np.insert(point_fm_list, 0, 0, axis=0) cumsum = np.cumsum(point_fm_list, axis=0) @@ -191,7 +187,6 @@ def _idk_square_sliding(self, X): return self._idk(X=subsequence_fm_list, psi=self.psi2, t=self.t) def _predict(self, X): - """Predict anomaly scores for input data.""" if self.sliding: return self._idk_square_sliding(X) return self._idk_t(X) @@ -214,5 +209,8 @@ def _get_test_params(cls, parameter_set="default"): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { + "psi1":8, + "psi2":2, + "width":1, "rng": np.random.RandomState(seed=42), } From 2c11c688c02b2b4c0f9a42d173ff2b4d71af2809 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 12:24:50 +0000 Subject: [PATCH 26/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 8587466cea..c7e1bf7362 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -209,8 +209,8 @@ def _get_test_params(cls, parameter_set="default"): `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ return { - "psi1":8, - "psi2":2, - "width":1, + "psi1": 8, + "psi2": 2, + "width": 1, "rng": np.random.RandomState(seed=42), } From 8383533bfbf800180b41f5faa7f5526b866126ae Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:27:39 +0400 Subject: [PATCH 27/42] Updated docs for test case --- aeon/anomaly_detection/tests/test_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 0318905202..f947151006 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,3 +1,4 @@ +"""Tests for the IDK class.""" import numpy as np from sklearn.utils import check_random_state From fc28ef3f847f5ec13d8e2206d735ac30d8bb442d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Fri, 20 Dec 2024 12:28:08 +0000 Subject: [PATCH 28/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index f947151006..1e0776bc88 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,4 +1,5 @@ """Tests for the IDK class.""" + import numpy as np from sklearn.utils import check_random_state From 270e0e103aa41da7f72682aee1d5be6d89697bea Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 21 Dec 2024 23:39:27 +0400 Subject: [PATCH 29/42] Updated test_idk.py --- aeon/anomaly_detection/tests/test_idk.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 1e0776bc88..119fb566b6 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,11 +1,13 @@ -"""Tests for the IDK class.""" - import numpy as np -from sklearn.utils import check_random_state - +import pytest from aeon.anomaly_detection import IDK +from aeon.utils.validation._dependencies import _check_estimator_deps +from sklearn.utils import check_random_state - +@pytest.mark.skipif( + not _check_estimator_deps(IDK, severity="none"), + reason="skip test if required soft dependencies not available", +) def test_idk_univariate(): """Test IDK on univariate data.""" rng = check_random_state(seed=2) @@ -17,8 +19,10 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 - - +@pytest.mark.skipif( + not _check_estimator_deps(IDK, severity="none"), + reason="skip test if required soft dependencies not available", +) def test_idk_univariate_sliding(): """Test IDK with sliding on univariate data.""" rng = check_random_state(seed=2) From 0319b5eed6ef05559dea687e0fe29829200064ae Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 21 Dec 2024 19:40:03 +0000 Subject: [PATCH 30/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 119fb566b6..11c7c818c6 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,8 +1,10 @@ import numpy as np import pytest +from sklearn.utils import check_random_state + from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps -from sklearn.utils import check_random_state + @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), @@ -19,6 +21,8 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 + + @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), reason="skip test if required soft dependencies not available", From 4bb39a4390b4e9f964518bf5f65d046bb3209371 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sat, 21 Dec 2024 23:43:49 +0400 Subject: [PATCH 31/42] Updated test_idk.py to add docs --- aeon/anomaly_detection/tests/test_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 11c7c818c6..24a3542f80 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,3 +1,4 @@ +"""Tests for the IDK Class.""" import numpy as np import pytest from sklearn.utils import check_random_state From e4b51d2c850d1192d788721769ef936550e35b7d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sat, 21 Dec 2024 19:44:18 +0000 Subject: [PATCH 32/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 24a3542f80..6d7d9ffe66 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,4 +1,5 @@ """Tests for the IDK Class.""" + import numpy as np import pytest from sklearn.utils import check_random_state From e5d9585cde3df1031481ff2bd61ba2b4cb3efbb2 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 31 Dec 2024 03:04:54 +0400 Subject: [PATCH 33/42] updated random_state --- aeon/anomaly_detection/_idk.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index c7e1bf7362..58eb206c07 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -1,5 +1,3 @@ -"""IDK² and s-IDK² anomaly detector.""" - import numpy as np from aeon.anomaly_detection.base import BaseAnomalyDetector @@ -59,9 +57,7 @@ class IDK(BaseAnomalyDetector): model processes the data in fixed-width segments, offering faster computation at the cost of granularity. - rng : np.random.Generator - A NumPy random generator instance to ensure reproducibility and avoid - global RNG state changes. + random_state : int, Random state or None, default=None Notes ----- @@ -92,20 +88,20 @@ def __init__( width: int, t: int = 100, sliding: bool = False, - rng: np.random.Generator = None, + random_state: int = None, ) -> None: self.psi1 = psi1 self.psi2 = psi2 self.width = width self.t = t self.sliding = sliding - self.rng = rng or np.random.default_rng() + self.random_state = np.random.default_rng(random_state) super().__init__(axis=0) def _ik_inne_fm(self, X, psi, t=100): onepoint_matrix = np.zeros((X.shape[0], t * psi), dtype=int) for time in range(t): - sample_indices = self.rng.choice(len(X), size=psi, replace=False) + sample_indices = self.random_state.choice(len(X), size=psi, replace=False) sample = X[sample_indices, :] tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) @@ -138,7 +134,7 @@ def _idk_t(self, X): onepoint_matrix = np.full((X.shape[0], self.t), -1) for time in range(self.t): - sample_indices = self.rng.choice(X.shape[0], size=self.psi1, replace=False) + sample_indices = self.random_state.choice(X.shape[0], size=self.psi1, replace=False) sample = X[sample_indices, :] tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) @@ -212,5 +208,5 @@ def _get_test_params(cls, parameter_set="default"): "psi1": 8, "psi2": 2, "width": 1, - "rng": np.random.RandomState(seed=42), + "random_state": 42, } From 316a5d1bd6b5482e2b106a0fcca60611c0583775 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Mon, 30 Dec 2024 23:05:35 +0000 Subject: [PATCH 34/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/_idk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/_idk.py b/aeon/anomaly_detection/_idk.py index 58eb206c07..344897abae 100644 --- a/aeon/anomaly_detection/_idk.py +++ b/aeon/anomaly_detection/_idk.py @@ -134,7 +134,9 @@ def _idk_t(self, X): onepoint_matrix = np.full((X.shape[0], self.t), -1) for time in range(self.t): - sample_indices = self.random_state.choice(X.shape[0], size=self.psi1, replace=False) + sample_indices = self.random_state.choice( + X.shape[0], size=self.psi1, replace=False + ) sample = X[sample_indices, :] tem1 = np.dot(np.square(X), np.ones(sample.T.shape)) tem2 = np.dot(np.ones(X.shape), np.square(sample.T)) From cb7992e579b8e9ce61bd970856a7dac13c6db53a Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 31 Dec 2024 03:12:02 +0400 Subject: [PATCH 35/42] Updated test.py --- aeon/anomaly_detection/tests/test_idk.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 6d7d9ffe66..1e779ea18b 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,11 +2,9 @@ import numpy as np import pytest -from sklearn.utils import check_random_state - from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps - +from sklearn.utils import check_random_state @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), @@ -18,13 +16,11 @@ def test_idk_univariate(): series = rng.normal(size=(100,)) series[50:58] -= 10 - ad = IDK(psi1=8, psi2=2, width=1, rng=rng) + ad = IDK(psi1=8, psi2=2, width=1, random_state=42) pred = ad.fit_predict(series) assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 - - @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), reason="skip test if required soft dependencies not available", @@ -34,7 +30,7 @@ def test_idk_univariate_sliding(): rng = check_random_state(seed=2) series = rng.normal(size=(100,)) series[50:58] -= 10 - ad = IDK(psi1=16, psi2=4, width=10, sliding=True, rng=rng) + ad = IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1) pred = ad.fit_predict(series) assert pred.shape == (91,) assert pred.dtype == np.float64 From b319624cf38a6478d1758fbcef85f470f3de853d Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Mon, 30 Dec 2024 23:12:30 +0000 Subject: [PATCH 36/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 1e779ea18b..419753c5b4 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,9 +2,11 @@ import numpy as np import pytest +from sklearn.utils import check_random_state + from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps -from sklearn.utils import check_random_state + @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), @@ -21,6 +23,8 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 + + @pytest.mark.skipif( not _check_estimator_deps(IDK, severity="none"), reason="skip test if required soft dependencies not available", From 3cbc709914bd203fcedd41b123d1aeb5c8445fd5 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 31 Dec 2024 21:11:29 +0400 Subject: [PATCH 37/42] Updated test_idk.py --- aeon/anomaly_detection/tests/test_idk.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 419753c5b4..e104886509 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,14 +2,12 @@ import numpy as np import pytest -from sklearn.utils import check_random_state - from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps - +from sklearn.utils import check_random_state @pytest.mark.skipif( - not _check_estimator_deps(IDK, severity="none"), + not _check_estimator_deps(IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none"), reason="skip test if required soft dependencies not available", ) def test_idk_univariate(): @@ -23,10 +21,8 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 - - @pytest.mark.skipif( - not _check_estimator_deps(IDK, severity="none"), + not _check_estimator_deps(IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1), severity="none"), reason="skip test if required soft dependencies not available", ) def test_idk_univariate_sliding(): From cac70c76d0836bd9d28a8347e820d00f30c09e41 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 31 Dec 2024 17:12:05 +0000 Subject: [PATCH 38/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index e104886509..438b15c995 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,12 +2,16 @@ import numpy as np import pytest +from sklearn.utils import check_random_state + from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps -from sklearn.utils import check_random_state + @pytest.mark.skipif( - not _check_estimator_deps(IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none"), + not _check_estimator_deps( + IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none" + ), reason="skip test if required soft dependencies not available", ) def test_idk_univariate(): @@ -21,8 +25,12 @@ def test_idk_univariate(): assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 + + @pytest.mark.skipif( - not _check_estimator_deps(IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1), severity="none"), + not _check_estimator_deps( + IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1), severity="none" + ), reason="skip test if required soft dependencies not available", ) def test_idk_univariate_sliding(): From a2f4bf0d6ff1355b2acada598b9d04b637ab294e Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Tue, 31 Dec 2024 21:17:29 +0400 Subject: [PATCH 39/42] Updated test_idk.py to make sliding and non sliding into 1 --- aeon/anomaly_detection/tests/test_idk.py | 31 ++++++------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 438b15c995..178c0167b0 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,16 +2,12 @@ import numpy as np import pytest -from sklearn.utils import check_random_state - from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps - +from sklearn.utils import check_random_state @pytest.mark.skipif( - not _check_estimator_deps( - IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none" - ), + not _check_estimator_deps(IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none"), reason="skip test if required soft dependencies not available", ) def test_idk_univariate(): @@ -22,24 +18,11 @@ def test_idk_univariate(): ad = IDK(psi1=8, psi2=2, width=1, random_state=42) pred = ad.fit_predict(series) + ad_sliding = IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1) + pred_sliding = ad_sliding.fit_predict(series) assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 - - -@pytest.mark.skipif( - not _check_estimator_deps( - IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1), severity="none" - ), - reason="skip test if required soft dependencies not available", -) -def test_idk_univariate_sliding(): - """Test IDK with sliding on univariate data.""" - rng = check_random_state(seed=2) - series = rng.normal(size=(100,)) - series[50:58] -= 10 - ad = IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1) - pred = ad.fit_predict(series) - assert pred.shape == (91,) - assert pred.dtype == np.float64 - assert 50 <= np.argmax(pred) <= 68 + assert pred_sliding.shape == (91,) + assert pred_sliding.dtype == np.float64 + assert 50 <= np.argmax(pred_sliding) <= 68 From 1c4262b7eab6e62a6ab08a7627cc25d01cbb0c30 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Tue, 31 Dec 2024 17:18:08 +0000 Subject: [PATCH 40/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 178c0167b0..d61bd163d6 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -2,12 +2,16 @@ import numpy as np import pytest +from sklearn.utils import check_random_state + from aeon.anomaly_detection import IDK from aeon.utils.validation._dependencies import _check_estimator_deps -from sklearn.utils import check_random_state + @pytest.mark.skipif( - not _check_estimator_deps(IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none"), + not _check_estimator_deps( + IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none" + ), reason="skip test if required soft dependencies not available", ) def test_idk_univariate(): From f91793b99f87ece95a9f056fcf4992d105da1540 Mon Sep 17 00:00:00 2001 From: Ramana Raja <83065061+Ramana-Raja@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:59:56 +0400 Subject: [PATCH 41/42] Updated test_idk.py --- aeon/anomaly_detection/tests/test_idk.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index d61bd163d6..69a3d74f49 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -1,32 +1,23 @@ """Tests for the IDK Class.""" import numpy as np -import pytest -from sklearn.utils import check_random_state from aeon.anomaly_detection import IDK -from aeon.utils.validation._dependencies import _check_estimator_deps - -@pytest.mark.skipif( - not _check_estimator_deps( - IDK(psi1=8, psi2=2, width=1, random_state=42), severity="none" - ), - reason="skip test if required soft dependencies not available", -) def test_idk_univariate(): """Test IDK on univariate data.""" - rng = check_random_state(seed=2) + rng = np.random.default_rng(seed=2) series = rng.normal(size=(100,)) - series[50:58] -= 10 + series[50:58] -= 5 - ad = IDK(psi1=8, psi2=2, width=1, random_state=42) + ad = IDK(psi1=8, psi2=2, width=1, random_state=2) pred = ad.fit_predict(series) ad_sliding = IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1) pred_sliding = ad_sliding.fit_predict(series) + assert pred.shape == (100,) assert pred.dtype == np.float64 assert 50 <= np.argmax(pred) <= 58 assert pred_sliding.shape == (91,) assert pred_sliding.dtype == np.float64 - assert 50 <= np.argmax(pred_sliding) <= 68 + assert 60 <= np.argmax(pred_sliding) <= 80 From 81b0f5b8f00748b8bb60138a9b06d651323eacfb Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sun, 5 Jan 2025 15:00:36 +0000 Subject: [PATCH 42/42] Automatic `pre-commit` fixes --- aeon/anomaly_detection/tests/test_idk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/tests/test_idk.py b/aeon/anomaly_detection/tests/test_idk.py index 69a3d74f49..791d1def8f 100644 --- a/aeon/anomaly_detection/tests/test_idk.py +++ b/aeon/anomaly_detection/tests/test_idk.py @@ -4,9 +4,10 @@ from aeon.anomaly_detection import IDK + def test_idk_univariate(): """Test IDK on univariate data.""" - rng = np.random.default_rng(seed=2) + rng = np.random.default_rng(seed=2) series = rng.normal(size=(100,)) series[50:58] -= 5