Merge branch 'main' into lof

aeon-toolkit · Nov 5, 2024 · feabd22 · feabd22
2 parents 8c74b0e + 23f42d3
commit feabd22
Show file tree

Hide file tree

Showing 345 changed files with 10,337 additions and 10,666 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -2592,6 +2592,15 @@
       "contributions": [
         "code"
       ]
+    },
+    {
+      "login": "emmanuel-ferdman",
+      "name": "Emmanuel Ferdman",
+      "avatar_url": "https://avatars.githubusercontent.com/u/35470921?v=4",
+      "profile": "https://github.com/emmanuel-ferdman",
+      "contributions": [
+        "doc"
+      ]
     }
   ],
   "commitType": "docs"

diff --git a/.codecov.yml b/.codecov.yml
@@ -25,9 +25,6 @@ github_checks:
 ignore:
   - ".github/"
   - ".binder/"
-  - "aeon/testing/expected_results/"
+  - "aeon/testing/expected_results/results_reproduction/"
   - "docs/"
   - "examples/"
-  - "extension_templates/"
-  # specific files
-  - "aeon/registry/_tag_deprecation.py"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,13 +29,13 @@ repos:
         args: [ "--create", "--python-folders", "aeon" ]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.0
+    rev: v0.7.2
     hooks:
       - id: ruff
         args: [ "--fix"]
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.18.0
+    rev: v3.19.0
     hooks:
       - id: pyupgrade
         args: [ "--py39-plus" ]

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
diff --git a/README.md b/README.md
@@ -55,14 +55,13 @@ can be found [here](https://www.aeon-toolkit.org/en/stable/developer_guide/dev_i
 
 The best place to get started for all `aeon` packages is our [getting started guide](https://www.aeon-toolkit.org/en/stable/getting_started.html).
 
-Below we provide a quick example of how to use `aeon` for forecasting,
-classification and clustering.
+Below we provide a quick example of how to use `aeon` for classification and clustering.
 
 ### Classification
 
 *It's worth mentioning that the classifier used in the example can easily be
 swapped out for a regressor, and the labels for numeric targets. This flexibility
-allowing for seamless adaptation to different tasks and datasets while preserving
+allows for seamless adaptation to different tasks and datasets while preserving
 API consistency.*
 
 ```python

diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py
@@ -1,6 +1,8 @@
 """Time Series Anomaly Detection."""
 
 __all__ = [
+    "CBLOF",
+    "COPOD",
     "DWT_MLEAD",
     "IsolationForest",
     "KMeansAD",
@@ -12,7 +14,8 @@
     "STRAY",
 ]
 
-
+from aeon.anomaly_detection._cblof import CBLOF
+from aeon.anomaly_detection._copod import COPOD
 from aeon.anomaly_detection._dwt_mlead import DWT_MLEAD
 from aeon.anomaly_detection._iforest import IsolationForest
 from aeon.anomaly_detection._kmeans import KMeansAD

diff --git a/aeon/anomaly_detection/_cblof.py b/aeon/anomaly_detection/_cblof.py
@@ -0,0 +1,162 @@
+"""CBLOF for Anomaly Detection."""
+
+__maintainer__ = []
+__all__ = ["CBLOF"]
+
+from typing import Optional, Union
+
+import numpy as np
+
+from aeon.anomaly_detection._pyodadapter import PyODAdapter
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+class CBLOF(PyODAdapter):
+    r"""CBLOF for Anomaly Detection.
+
+    This class implements the CBLOF algorithm for anomaly detection
+    using PyODAdadpter to be used in the aeon framework. All parameters are passed to
+    the PyOD model ``CBLOF`` except for `window_size` and `stride`, which are used to
+    construct the sliding windows.
+
+    .. list-table:: Capabilities
+       :stub-columns: 1
+
+       * - Input data format
+         - univariate and multivariate
+       * - Output data format
+         - anomaly scores
+       * - Learning Type
+         - unsupervised or semi-supervised
+
+    The documentation for parameters has been adapted from the
+    [PyOD documentation](https://pyod.readthedocs.io/en/latest/pyod.models.html#id117).
+    Here, `X` refers to the set of sliding windows extracted from the time series
+    using :func:`aeon.utils.windowing.sliding_windows` with the parameters
+    ``window_size`` and ``stride``. The internal `X` has the shape
+    `(n_windows, window_size * n_channels)`.
+
+    Parameters
+    ----------
+    n_clusters : int, default=8
+        The number of clusters to form as well as the number of
+        centroids to generate.
+
+    clustering_estimator : Estimator or None, default=None
+        The base clustering algorithm for performing data clustering.
+        A valid clustering algorithm should be passed in. The estimator should
+        have standard sklearn APIs, fit() and predict(). The estimator should
+        have attributes ``labels_`` and ``cluster_centers_``.
+        If ``cluster_centers_`` is not in the attributes once the model is fit,
+        it is calculated as the mean of the samples in a cluster.
+
+        If not set, CBLOF uses KMeans for scalability. See
+        https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
+
+        aeon clustering estimators are not supported.
+
+    alpha : float in (0.5, 1), default=0.9
+        Coefficient for deciding small and large clusters. The ratio
+        of the number of samples in large clusters to the number of samples in
+        small clusters.
+
+    beta : int or float in (1,), default=5
+        Coefficient for deciding small and large clusters. For a list
+        sorted clusters by size `|C1|, \|C2|, ..., |Cn|, beta = |Ck|/|Ck-1|`
+
+    use_weights : bool, default=False
+        If set to True, the size of clusters are used as weights in
+        outlier score calculation.
+
+    check_estimator : bool, default=False
+        If set to True, check whether the base estimator is consistent with
+        sklearn standard.
+
+    random_state : int, np.RandomState or None, default=None
+        If int, random_state is the seed used by the random
+        number generator; If RandomState instance, random_state is the random
+        number generator; If None, the random number generator is the
+        RandomState instance used by `np.random`.
+
+    window_size : int, default=10
+        Size of the sliding window.
+
+    stride : int, default=1
+        Stride of the sliding window.
+    """
+
+    _tags = {
+        "capability:multivariate": True,
+        "capability:univariate": True,
+        "capability:missing_values": False,
+        "fit_is_empty": False,
+        "python_dependencies": ["pyod"],
+    }
+
+    def __init__(
+        self,
+        n_clusters: int = 8,
+        clustering_estimator=None,
+        alpha: float = 0.9,
+        beta: Union[int, float] = 5,
+        use_weights: bool = False,
+        check_estimator: bool = False,
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
+        window_size: int = 10,
+        stride: int = 1,
+    ):
+        _check_soft_dependencies(*self._tags["python_dependencies"])
+        from pyod.models.cblof import CBLOF
+
+        model = CBLOF(
+            n_clusters=n_clusters,
+            clustering_estimator=clustering_estimator,
+            alpha=alpha,
+            beta=beta,
+            use_weights=use_weights,
+            check_estimator=check_estimator,
+            random_state=random_state,
+        )
+        self.n_clusters = n_clusters
+        self.clustering_estimator = clustering_estimator
+        self.alpha = alpha
+        self.beta = beta
+        self.use_weights = use_weights
+        self.check_estimator = check_estimator
+        self.random_state = random_state
+        super().__init__(model, window_size, stride)
+
+    def _fit(self, X: np.ndarray, y: Union[np.ndarray, None] = None) -> None:
+        super()._fit(X, y)
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(
+        self, X: np.ndarray, y: Union[np.ndarray, None] = None
+    ) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
+    @classmethod
+    def _get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`.
+        """
+        return {
+            "n_clusters": 4,
+            "alpha": 0.75,
+            "beta": 3,
+        }
diff --git a/aeon/anomaly_detection/_copod.py b/aeon/anomaly_detection/_copod.py
@@ -0,0 +1,87 @@
+"""COPOD for anomaly detection."""
+
+__maintainer__ = []
+__all__ = ["COPOD"]
+
+from typing import Union
+
+import numpy as np
+
+from aeon.anomaly_detection._pyodadapter import PyODAdapter
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+class COPOD(PyODAdapter):
+    """COPOD for anomaly detection.
+
+    This class implements the COPOD using PyODAdadpter to be used in the aeon framework.
+    The parameter `n_jobs` is passed to COPOD model from PyOD, `window_size` and
+    `stride` are used to construct the sliding windows.
+
+    .. list-table:: Capabilities
+       :stub-columns: 1
+       * - Input data format
+         - univariate and multivariate
+       * - Output data format
+         - anomaly scores
+       * - Learning Type
+         - unsupervised or semi-supervised
+
+    Parameters
+    ----------
+    n_jobs : int, default=1
+        The number of jobs to run in parallel for the COPOD model.
+
+    window_size : int, default=10
+        Size of the sliding window.
+
+    stride : int, default=1
+        Stride of the sliding window.
+    """
+
+    _tags = {
+        "capability:multivariate": True,
+        "capability:univariate": True,
+        "capability:missing_values": False,
+        "fit_is_empty": False,
+        "python_dependencies": ["pyod"],
+    }
+
+    def __init__(self, n_jobs: int = 1, window_size: int = 10, stride: int = 1):
+        _check_soft_dependencies(*self._tags["python_dependencies"])
+        from pyod.models.copod import COPOD
+
+        model = COPOD(n_jobs=n_jobs)
+        self.n_jobs = n_jobs
+        super().__init__(model, window_size=window_size, stride=stride)
+
+    def _fit(self, X: np.ndarray, y: Union[np.ndarray, None] = None) -> None:
+        super()._fit(X, y)
+
+    def _predict(self, X: np.ndarray) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(
+        self, X: np.ndarray, y: Union[np.ndarray, None] = None
+    ) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
+    @classmethod
+    def _get_test_params(cls, parameter_set="default") -> dict:
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`.
+        """
+        return {}
diff --git a/aeon/anomaly_detection/_dwt_mlead.py b/aeon/anomaly_detection/_dwt_mlead.py
@@ -236,7 +236,7 @@ def _push_anomaly_counts_down_to_points(
         return counter[:n]
 
     @classmethod
-    def get_test_params(cls, parameter_set="default"):
+    def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
 
         Only supports 'default'-parameter set.
@@ -253,7 +253,6 @@ def get_test_params(cls, parameter_set="default"):
             Parameters to create testing instances of the class.
             Each dict are parameters to construct an "interesting" test instance, i.e.,
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`.
         """
         return {
             "start_level": 2,

diff --git a/aeon/anomaly_detection/_iforest.py b/aeon/anomaly_detection/_iforest.py
@@ -137,7 +137,7 @@ def _fit_predict(
         return super()._fit_predict(X, y)
 
     @classmethod
-    def get_test_params(cls, parameter_set="default"):
+    def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
 
         Parameters
@@ -152,7 +152,6 @@ def get_test_params(cls, parameter_set="default"):
             Parameters to create testing instances of the class.
             Each dict are parameters to construct an "interesting" test instance, i.e.,
             `IsolationForest(**params)` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`.
         """
         return {
             "n_estimators": 10,

diff --git a/aeon/anomaly_detection/_kmeans.py b/aeon/anomaly_detection/_kmeans.py
@@ -171,7 +171,7 @@ def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray:
         return point_anomaly_scores
 
     @classmethod
-    def get_test_params(cls, parameter_set="default"):
+    def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
 
         Parameters
@@ -186,7 +186,6 @@ def get_test_params(cls, parameter_set="default"):
             Parameters to create testing instances of the class.
             Each dict are parameters to construct an "interesting" test instance, i.e.,
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`.
         """
         return {
             "n_clusters": 5,

diff --git a/aeon/anomaly_detection/_merlin.py b/aeon/anomaly_detection/_merlin.py
@@ -208,7 +208,7 @@ def _drag(X, length, discord_range):
         return C[d_max], np.sqrt(D[d_max])
 
     @classmethod
-    def get_test_params(cls, parameter_set="default"):
+    def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
 
         Parameters
@@ -223,6 +223,5 @@ def get_test_params(cls, parameter_set="default"):
             Parameters to create testing instances of the class.
             Each dict are parameters to construct an "interesting" test instance, i.e.,
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
-            `create_test_instance` uses the first (or only) dictionary in `params`.
         """
         return {"min_length": 4, "max_length": 7}