Skip to content

Commit 224cdc1

Browse files
aryanpolaSebastian SchmidlSebastianSchmidl
authored
[ENH] Implementation of LOF using PyODAdapter (#2209)
* Implementation of LOF using PyODAdapter * Automatic `pre-commit` fixes * window size > stride in params * NotFittederror * Automatic `pre-commit` fixes * return self * Automatic `pre-commit` fixes * test to check params * Adding a value to window_size if not provided * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Changes in test, 1 failing * Automatic `pre-commit` fixes * pyod_model as parameter * unfinished changes * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * working model * Automatic `pre-commit` fixes * merge conflicts * . * Automatic `pre-commit` fixes * git conflicts * Automatic `pre-commit` fixes * git conflicts * test * Automatic `pre-commit` fixes * test2 * test3 * Automatic `pre-commit` fixes * . * fixes * fixed minor mistakes * Update aeon/anomaly_detection/tests/test_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/tests/test_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/tests/test_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Update aeon/anomaly_detection/_lof.py Co-authored-by: Sebastian Schmidl <[email protected]> * Changes in test_lof * removed anomaly range stuff * removed unnecessary in test_lof_default * checking for anomalies --------- Co-authored-by: aryanpola <[email protected]> Co-authored-by: Sebastian Schmidl <[email protected]> Co-authored-by: Sebastian Schmidl <[email protected]>
1 parent 2fcfe11 commit 224cdc1

File tree

4 files changed

+388
-6
lines changed

4 files changed

+388
-6
lines changed

aeon/anomaly_detection/__init__.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
"""Time Series Anomaly Detection."""
22

33
__all__ = [
4+
"CBLOF",
5+
"COPOD",
46
"DWT_MLEAD",
7+
"IsolationForest",
58
"KMeansAD",
9+
"LeftSTAMPi",
10+
"LOF",
611
"MERLIN",
7-
"STRAY",
12+
"OneClassSVM",
813
"PyODAdapter",
914
"STOMP",
10-
"LeftSTAMPi",
11-
"IsolationForest",
12-
"CBLOF",
13-
"COPOD",
14-
"OneClassSVM",
15+
"STRAY",
1516
]
1617

1718
from aeon.anomaly_detection._cblof import CBLOF
@@ -20,6 +21,7 @@
2021
from aeon.anomaly_detection._iforest import IsolationForest
2122
from aeon.anomaly_detection._kmeans import KMeansAD
2223
from aeon.anomaly_detection._left_stampi import LeftSTAMPi
24+
from aeon.anomaly_detection._lof import LOF
2325
from aeon.anomaly_detection._merlin import MERLIN
2426
from aeon.anomaly_detection._one_class_svm import OneClassSVM
2527
from aeon.anomaly_detection._pyodadapter import PyODAdapter

aeon/anomaly_detection/_lof.py

+169
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""Local Outlier Factor (LOF) algorithm for anomaly detection."""
2+
3+
__maintainer__ = []
4+
__all__ = ["LOF"]
5+
6+
from typing import Optional, Union
7+
8+
import numpy as np
9+
10+
from aeon.anomaly_detection._pyodadapter import PyODAdapter
11+
from aeon.utils.validation._dependencies import _check_soft_dependencies
12+
13+
14+
class LOF(PyODAdapter):
15+
"""Local Outlier Factor (LOF) algorithm for anomaly detection.
16+
17+
This class implement metrics-based outlier detection algorithms using the
18+
Local Outlier Factor (LOF) algorithm from PyOD.
19+
20+
.. list-table:: Capabilities
21+
:stub-columns: 1
22+
23+
* - Input data format
24+
- univariate or multivariate
25+
* - Output data format
26+
- anomaly scores
27+
* - missing_values
28+
- False
29+
* - Learning Type
30+
- unsupervised or semi-supervised
31+
* - python_dependencies
32+
- ["pyod"]
33+
34+
The documentation for parameters has been adapted from the
35+
[PyOD documentation](https://pyod.readthedocs.io/en/latest/pyod.models.html#id586).
36+
Here, `X` refers to the set of sliding windows extracted from the time series
37+
using :func:`aeon.utils.windowing.sliding_windows` with the parameters
38+
``window_size`` and ``stride``. The internal `X` has the shape
39+
`(n_windows, window_size * n_channels)`.
40+
41+
Parameters
42+
----------
43+
n_neighbors : int, optional (default=20)
44+
Number of neighbors to use by default for `kneighbors` queries.
45+
If n_neighbors is larger than the number of samples provided,
46+
all samples will be used.
47+
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
48+
Algorithm used to compute the nearest neighbors:
49+
- 'ball_tree' will use BallTree
50+
- 'kd_tree' will use KDTree
51+
- 'brute' will use a brute-force search.
52+
- 'auto' will attempt to decide the most appropriate algorithm
53+
based on the values passed to :meth:`fit` method.
54+
Note: fitting on sparse input will override the setting of
55+
this parameter, using brute force.
56+
leaf_size : int, optional (default=30)
57+
Leaf size passed to `BallTree` or `KDTree`. This can
58+
affect the speed of the construction and query, as well as the memory
59+
required to store the tree. The optimal value depends on the
60+
nature of the problem.
61+
metric : string or callable, default 'minkowski'
62+
metric used for the distance computation. Any metric from scikit-learn
63+
or scipy.spatial.distance can be used.
64+
If metric is a callable function, it is called on each
65+
pair of instances (rows) and the resulting value recorded. The callable
66+
should take two arrays as input and return one value indicating the
67+
distance between them. This works for Scipy's metrics, but is less
68+
efficient than passing the metric name as a string.
69+
p : integer, optional (default = 2)
70+
Parameter for the Minkowski metric
71+
metric_params : dict, optional (default = None)
72+
Additional keyword arguments for the metric function.
73+
n_jobs : int, optional (default = 1)
74+
The number of parallel jobs to run for neighbors search.
75+
If ``-1``, then the number of jobs is set to the number of CPU cores.
76+
Affects only kneighbors and kneighbors_graph methods.
77+
novelty : bool (default=False)
78+
By default, LocalOutlierFactor is only meant to be used for outlier
79+
detection (novelty=False). Set novelty to True if you want to use
80+
LocalOutlierFactor for novelty detection. In this case be aware that
81+
that you should only use predict, decision_function and score_samples
82+
on new unseen data and not on the training set.
83+
"""
84+
85+
_tags = {
86+
"capability:multivariate": True,
87+
"capability:univariate": True,
88+
"capability:missing_values": False,
89+
"fit_is_empty": False,
90+
"python_dependencies": ["pyod"],
91+
}
92+
93+
def __init__(
94+
self,
95+
n_neighbors: int = 20,
96+
algorithm: Optional[str] = "auto",
97+
leaf_size: int = 30,
98+
metric: str = "minkowski",
99+
p: int = 2,
100+
metric_params: Optional[dict] = None,
101+
n_jobs: int = 1,
102+
window_size: int = 10,
103+
stride: int = 1,
104+
):
105+
_check_soft_dependencies(*self._tags["python_dependencies"])
106+
from pyod.models.lof import LOF as PyOD_LOF
107+
108+
# Set a default contamination value internally
109+
contamination = 0.1
110+
111+
model = PyOD_LOF(
112+
n_neighbors=n_neighbors,
113+
algorithm=algorithm,
114+
leaf_size=leaf_size,
115+
metric=metric,
116+
p=p,
117+
metric_params=metric_params,
118+
n_jobs=n_jobs,
119+
contamination=contamination, # Only for PyOD LOF
120+
novelty=False, # Initialize unsupervised LOF (novelty=False)
121+
)
122+
self.n_neighbors = n_neighbors
123+
self.algorithm = algorithm
124+
self.leaf_size = leaf_size
125+
self.metric = metric
126+
self.p = p
127+
self.metric_params = metric_params
128+
self.n_jobs = n_jobs
129+
super().__init__(pyod_model=model, window_size=window_size, stride=stride)
130+
131+
def _fit(self, X: np.ndarray, y: Union[np.ndarray, None] = None) -> None:
132+
# Set novelty to True for semi-supervised learning
133+
self.pyod_model.novelty = True
134+
super()._fit(X, y)
135+
136+
def _predict(self, X: np.ndarray) -> np.ndarray:
137+
return super()._predict(X)
138+
139+
def _fit_predict(
140+
self, X: np.ndarray, y: Union[np.ndarray, None] = None
141+
) -> np.ndarray:
142+
# Set novelty to False for unsupervised learning
143+
self.pyod_model.novelty = False
144+
return super()._fit_predict(X, y)
145+
146+
@classmethod
147+
def _get_test_params(cls, parameter_set="default"):
148+
"""Return testing parameter settings for the estimator.
149+
150+
Parameters
151+
----------
152+
parameter_set : str, default="default"
153+
Name of the set of test parameters to return, for use in tests.
154+
155+
Returns
156+
-------
157+
params : dict
158+
Parameters to create testing instances of the class.
159+
Each dict corresponds to parameters that will create an "interesting"
160+
test instance.
161+
"""
162+
# Define a test parameter set with different combinations of parameters
163+
return {
164+
"n_neighbors": 5,
165+
"leaf_size": 10,
166+
"p": 2,
167+
"window_size": 10,
168+
"stride": 2,
169+
}

0 commit comments

Comments
 (0)