diff --git a/aeon/anomaly_detection/deep_learning/__init__.py b/aeon/anomaly_detection/deep_learning/__init__.py new file mode 100644 index 0000000000..47325d4253 --- /dev/null +++ b/aeon/anomaly_detection/deep_learning/__init__.py @@ -0,0 +1,5 @@ +"""Deep learning based anomaly detector.""" + +__all__ = ["LSTM_AD"] + +from aeon.anomaly_detection.deep_learning._lstm_ad import LSTM_AD diff --git a/aeon/anomaly_detection/deep_learning/_lstm_ad.py b/aeon/anomaly_detection/deep_learning/_lstm_ad.py new file mode 100644 index 0000000000..566f154bbd --- /dev/null +++ b/aeon/anomaly_detection/deep_learning/_lstm_ad.py @@ -0,0 +1,426 @@ +"""LSTM-AD Anomaly Detector.""" + +__all__ = ["LSTM_AD"] + +import gc +import os +import time +from copy import deepcopy + +import numpy as np +from scipy.stats import multivariate_normal +from sklearn.covariance import EmpiricalCovariance +from sklearn.metrics import fbeta_score +from sklearn.model_selection import train_test_split + +from aeon.anomaly_detection.deep_learning.base import BaseDeepAnomalyDetector +from aeon.networks import LSTMNetwork + + +class LSTM_AD(BaseDeepAnomalyDetector): + """LSTM-AD anomaly detector. + + The LSTM-AD uses stacked LSTM network for anomaly detection in time series. A + network is trained over non-anomalous data and used as a predictor over a + number of time steps. The resulting prediction errors are modeled as a + multivariate Gaussian distribution, which is used to assess the likelihood of + anomalous behavior. + + ``LSTMAD`` supports univariate and multivariate time series. It can also be + fitted on a clean reference time series and used to detect anomalies in a different + target time series with the same number of dimensions. + + .. list-table:: Capabilities + :stub-columns: 1 + + * - Input data format + - univariate and multivariate + * - Output data format + - binary classification + * - Learning Type + - supervised + + Parameters + ---------- + n_layers : int, default=2 + The number of LSTM layers to be stacked. + + n_nodes : int, default=64 + The number of LSTM units in each layer. + + window_size : int, default=20 + The size of the sliding window used to split the time series into windows. The + bigger the window size, the bigger the anomaly context is. If it is too big, + however, the detector marks points anomalous that are not. If it is too small, + the detector might not detect larger anomalies or contextual anomalies at all. + If ``window_size`` is smaller than the anomaly, the detector might detect only + the transitions between normal data and the anomalous subsequence. + + prediction_horizon : int, default=1 + The prediction horizon is the number of time steps in the future predicted by + the LSTM. default value is ``1``, which means the the LSTM will take + ``window_size`` time steps as input and predict ``1`` time step in the future. + + batch_size : int, default=32 + The number of time steps per gradient update. + + optimizer : keras.optimizer, default=keras.optimizers.Adadelta() + + n_epochs: int, default = 1500 + The number of epochs to train the model. + + patience: int, default = 5 + The number of epochs to watch before early stopping. + + verbose : boolean, default = False + whether to output extra information + + file_path : str, default = "./" + file_path when saving model_Checkpoint callback + + save_best_model : bool, default = False + Whether or not to save the best model, if the + modelcheckpoint callback is used by default, + this condition, if True, will prevent the + automatic deletion of the best saved model from + file and the user can choose the file name + + save_last_model : bool, default = False + Whether or not to save the last model, last + epoch trained, using the base class method + save_last_model_to_file + + save_init_model : bool, default = False + Whether to save the initialization of the model. + + best_file_name : str, default = "best_model" + The name of the file of the best model, if + save_best_model is set to False, this parameter + is discarded + + last_file_name : str, default = "last_model" + The name of the file of the last model, if + save_last_model is set to False, this parameter + is discarded + + init_file_name : str, default = "init_model" + The name of the file of the init model, if save_init_model is set to False, + this parameter is discarded. + + Notes + ----- + This implementation is inspired by [1]_. + + References + ---------- + .. [1] Malhotra Pankaj, Lovekesh Vig, Gautam Shroff, and Puneet Agarwal. + Long Short Term Memory Networks for Anomaly Detection in Time Series. In Proceedings + of the European Symposium on Artificial Neural Networks, Computational Intelligence + and Machine Learning (ESANN), Vol. 23, 2015. + https://www.esann.org/sites/default/files/proceedings/legacy/es2015-56.pdf + + Examples + -------- + >>> import numpy as np + >>> from aeon.datasets import load_anomaly_detection + >>> from aeon.anomaly_detection.deep_learning import LSTM_AD + >>> X, y = load_anomaly_detection( + ... name=("KDD-TSAD", "001_UCR_Anomaly_DISTORTED1sddb40") + ... ) + >>> detector = LSTM_AD( + ... n_layers=4, n_nodes=64, window_size=10, prediction_horizon=2 + ... ) # doctest: +SKIP + >>> detector.fit(X, axis=0) # doctest: +SKIP + LSTM_AD(...) + """ + + _tags = { + "capability:univariate": True, + "capability:multivariate": True, + "capability:missing_values": False, + "fit_is_empty": False, + "requires_y": True, + "python_dependencies": "tensorflow", + } + + def __init__( + self, + n_layers: int = 2, + n_nodes: int = 64, + window_size: int = 20, + prediction_horizon: int = 1, + batch_size: int = 32, + n_epochs: int = 1500, + patience: int = 5, + verbose: bool = False, + loss="mse", + optimizer=None, + file_path="./", + save_best_model=False, + save_last_model=False, + save_init_model=False, + best_file_name="best_model", + last_file_name="last_model", + init_file_name="init_model", + ): + self.n_layers = n_layers + self.n_nodes = n_nodes + self.window_size = window_size + self.prediction_horizon = prediction_horizon + self.batch_size = batch_size + self.n_epochs = n_epochs + self.patience = patience + self.verbose = verbose + self.loss = loss + self.optimizer = optimizer + self.file_path = file_path + self.save_best_model = save_best_model + self.save_last_model = save_last_model + self.save_init_model = save_init_model + self.best_file_name = best_file_name + self.last_file_name = last_file_name + self.init_file_name = init_file_name + + self.history = None + + super().__init__() + + self._network = LSTMNetwork( + self.n_nodes, self.n_layers, self.prediction_horizon + ) + + def build_model(self, **kwargs): + """Construct a compiled, un-trained, keras model that is ready for training. + + In aeon, time series are stored in numpy arrays of shape (d,m), where d + is the number of dimensions, m is the series length. Keras/tensorflow assume + data is in shape (m,d). This method also assumes (m,d). Transpose should + happen in fit. + + Returns + ------- + output : a compiled Keras Model + """ + import tensorflow as tf + + input_layer, output_layer = self._network.build_network( + (self.window_size, self.n_channels), **kwargs + ) + + self.optimizer_ = ( + tf.keras.optimizers.Adam() if self.optimizer is None else self.optimizer + ) + + model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer) + + model.compile(optimizer=self.optimizer_, loss=self.loss) + + return model + + def _fit(self, X: np.array, y: np.array): + """Fit the model on the data. + + Parameters + ---------- + X: np.ndarray of shape (n_timepoints, n_channels) + The training time series, maybe with anomalies. + y: np.ndarray of shape (n_timepoints,) or (n_timepoints, 1) + Anomaly annotations for the training time series with values 0 or 1. + """ + import tensorflow as tf + + self._check_params(X) + + # Create normal time series if not present + if len(np.unique(y)) == 2: + X_normal = X[y == 0] + y_normal = y[y == 0] + X_anomaly = X[y == 1] + else: + raise ValueError( + "The training time series must have anomaly annotations with values" + "0 for normal and 1 for anomaly." + ) + + # Divide the normal time series into train set and two validation sets for lstm + X_train, X_val, y_train, y_val = train_test_split( + X_normal, y_normal, test_size=0.2, shuffle=False + ) + X_val1, X_val2, y_val1, y_val2 = train_test_split( + X_val, y_val, test_size=0.5, shuffle=False + ) + + X_train_n, y_train_n = _create_sequences( + X_train, self.window_size, self.prediction_horizon + ) + y_train_n = y_train_n.reshape(-1, self.prediction_horizon * self.n_channels) + X_val_1, y_val_1 = _create_sequences( + X_val1, self.window_size, self.prediction_horizon + ) + y_val_1 = y_val_1.reshape(-1, self.prediction_horizon * self.n_channels) + X_val_2, y_val_2 = _create_sequences( + X_val2, self.window_size, self.prediction_horizon + ) + y_val_2 = y_val_2.reshape(-1, self.prediction_horizon * self.n_channels) + + X_anomalies, y_anomalies = _create_sequences( + X_anomaly, self.window_size, self.prediction_horizon + ) + y_anomalies = y_anomalies.reshape(-1, self.prediction_horizon * self.n_channels) + + # Fit LSTM model on the normal train set + # input_shape = (self.window_size, self.n_channels) + + self.training_model_ = self.build_model() + + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + + if self.verbose: + self.training_model_.summary() + + self.file_name_ = ( + self.best_file_name if self.save_best_model else str(time.time_ns()) + ) + + self.callbacks_ = [ + tf.keras.callbacks.EarlyStopping( + monitor="val_loss", patience=self.patience, restore_best_weights=True + ) + ] + + self.history = self.training_model_.fit( + X_train_n, + y_train_n, + validation_data=(X_val_1, y_val_1), + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self.callbacks_, + ) + + # Prediction errors on validation set 1 to calculate error vector + predicted_vN1 = self.training_model_.predict(X_val_1) + errors_vN1 = y_val_1 - predicted_vN1 + + # Fit the error vectors to a Gaussian distribution + cov_estimator = EmpiricalCovariance() + cov_estimator.fit(errors_vN1) + + # Mean and covariance matrix of the error distribution + mu = cov_estimator.location_ + cov_matrix = cov_estimator.covariance_ + + # Create a Gaussian Normal Distribution + self.distribution = multivariate_normal(mean=mu, cov=cov_matrix) + + predicted_vN2 = self.training_model_.predict(X_val_2) + predicted_vA = self.training_model_.predict(X_anomalies) + + errors_vN2 = y_val_2 - predicted_vN2 + errors_vA = y_anomalies - predicted_vA + + # Estimate the likelihood of the errors: + p_vN2 = self.distribution.pdf(errors_vN2) + p_vA = self.distribution.pdf(errors_vA) + + # Combine likelihoods and labels + likelihoods = np.concatenate([p_vN2, p_vA]) + true_labels = np.concatenate( + [np.zeros_like(p_vN2), np.ones_like(p_vA)] + ) # 0 for normal, 1 for anomalous + + # Experiment with different thresholds and calculate Fβ-score + self.best_tau = None + self.best_fbeta = -1 + + # Loop over different thresholds + for tau in np.linspace(min(likelihoods), max(likelihoods), 100): + # Classify as anomalous if likelihood < tau + predictions = (likelihoods < tau).astype(int) + + # Calculate Fβ-score (arbitrarily use beta=1.0 for F1-score) + fbeta = fbeta_score(true_labels, predictions, beta=1.0) + + # Track the best threshold and Fβ-score + if fbeta > self.best_fbeta: + self.best_tau = tau + self.best_fbeta = fbeta + + try: + if self.save_best_model: + self.model_ = tf.keras.models.load_model( + self.file_path + self.file_name_ + ".keras", compile=False + ) + else: + os.remove(self.file_path + self.file_name_ + ".keras") + except FileNotFoundError: + self.model_ = deepcopy(self.training_model_) + + if self.save_last_model: + self.save_last_model_to_file(file_path=self.file_path) + + gc.collect() + return self + + def _predict(self, X): + X_, y_ = _create_sequences(X, self.window_size, self.prediction_horizon) + y_ = y_.reshape(-1, self.prediction_horizon * self.n_channels) + predict_test = self.training_model_.predict(X_) + errors = y_ - predict_test + likelihoods = self.distribution.pdf(errors) + anomalies = (likelihoods < self.best_tau).astype(int) + padding = np.zeros(X.shape[0] - len(anomalies)) + prediction = np.concatenate([padding, anomalies]) + return np.array(prediction, dtype=int) + + def _check_params(self, X: np.ndarray) -> None: + if X.ndim == 1: + self.n_channels = 1 + elif X.ndim == 2: + self.n_channels = X.shape[1] + else: + raise ValueError( + "The training time series must be of shape (n_timepoints,) or " + "(n_timepoints, n_channels)." + ) + + if self.window_size < 1 or self.window_size > X.shape[0]: + raise ValueError( + "The window size must be at least 1 and at most the length of the " + "time series." + ) + if self.batch_size < 1 or self.batch_size > X.shape[0]: + raise ValueError( + "The batch size must be at least 1 and at most the length of the " + "time series." + ) + + +# Create input and output sequences for lstm using sliding window +def _create_sequences(data, window_size, prediction_horizon): + """Create input and output sequences using sliding window to train LSTM. + + Parameters + ---------- + data: np.dnarray + The time series of shape (n_timepoints, n_channels). + window_size: int + The length of the sliding window. + prediction_horizon: int + The number of time steps in future that would be predicted by the model. + + Returns + ------- + X: np.ndarray + The array of input sequences of shape + (n_timepoints - window_size - 1, n_channels). + y: np.ndarray + The array of output sequences of shape + (n_timepoints - window_size - 1, window_size). + """ + X, y = [], [] + for i in range(len(data) - window_size - prediction_horizon + 1): + X.append(data[i : (i + window_size)]) + y.append(data[(i + window_size) : (i + window_size + prediction_horizon)]) + return np.array(X), np.array(y) diff --git a/aeon/anomaly_detection/deep_learning/base.py b/aeon/anomaly_detection/deep_learning/base.py new file mode 100644 index 0000000000..9053c1a568 --- /dev/null +++ b/aeon/anomaly_detection/deep_learning/base.py @@ -0,0 +1,130 @@ +""" +Abstract base class for the Keras neural network anomaly detectors. + +The reason for this class between BaseAnomalyDetector and deep_learning +anomaly detectors is because we can generalise tags, _predict and _predict_proba +""" + +__all__ = ["BaseDeepAnomalyDetector"] + +from abc import ABC, abstractmethod + +from aeon.anomaly_detection.base import BaseAnomalyDetector + + +class BaseDeepAnomalyDetector(BaseAnomalyDetector, ABC): + """Abstract base class for deep learning time series anomaly detection. + + The base anomaly detector provides a deep learning default method for + _predict and _predict_proba, and provides a new abstract method for building a + model. + + Parameters + ---------- + batch_size : int, default = 40 + training batch size for the model + last_file_name : str, default = "last_model" + The name of the file of the last model, used + only if save_last_model_to_file is used + + Arguments + --------- + self.model = None + + """ + + _tags = { + "capability:multivariate": True, + "algorithm_type": "deeplearning", + "non_deterministic": True, + "cant_pickle": True, + "python_dependencies": "tensorflow", + } + + def __init__( + self, + batch_size=40, + last_file_name="last_model", + ): + self.batch_size = batch_size + self.last_file_name = last_file_name + self.model_ = None + + super().__init__(axis=0) + + @abstractmethod + def build_model(self, input_shape): + """Construct a compiled, un-trained, keras model that is ready for training. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer + + Returns + ------- + A compiled Keras Model + """ + ... + + def summary(self): + """ + Summary function to return the losses/metrics for model fit. + + Returns + ------- + history : dict or None, + Dictionary containing model's train/validation losses and metrics + + """ + return self.history.history if self.history is not None else None + + def save_last_model_to_file(self, file_path="./"): + """Save the last epoch of the trained deep learning model. + + Parameters + ---------- + file_path : str, default = "./" + The directory where the model will be saved + + Returns + ------- + None + """ + self.model_.save(file_path + self.last_file_name + ".keras") + + def load_model(self, model_path, classes): + """Load a pre-trained keras model instead of fitting. + + When calling this function, all functionalities can be used + such as predict, predict_proba etc. with the loaded model. + + Parameters + ---------- + model_path : str (path including model name and extension) + The directory where the model will be loaded from including the model + name with a ".keras" extension. + Example: model_path="path/to/file/best_model.keras" + + Returns + ------- + None + """ + import tensorflow as tf + + self.model_ = tf.keras.models.load_model(model_path) + self.is_fitted = True + + def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): + import tensorflow as tf + + model_checkpoint_ = tf.keras.callbacks.ModelCheckpoint( + filepath=file_path + file_name + ".keras", + monitor="loss", + save_best_only=True, + ) + + if isinstance(callbacks, list): + return callbacks + [model_checkpoint_] + else: + return [callbacks] + [model_checkpoint_] diff --git a/aeon/anomaly_detection/tests/test_lstm_ad.py b/aeon/anomaly_detection/tests/test_lstm_ad.py new file mode 100644 index 0000000000..1256c5c25a --- /dev/null +++ b/aeon/anomaly_detection/tests/test_lstm_ad.py @@ -0,0 +1,58 @@ +"""Tests for the LSTM_AD class.""" + +import numpy as np +import pytest + +from aeon.anomaly_detection.deep_learning import LSTM_AD +from aeon.testing.data_generation._legacy import make_series +from aeon.utils.validation._dependencies import _check_soft_dependencies + + +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency not available", +) +def test_lstmad_univariate(): + """Test LSTM_AD univariate output.""" + series = make_series(n_timepoints=1000, return_numpy=True, random_state=42) + labels = np.zeros(1000).astype(int) + + # Create anomalies + anomaly_indices = np.random.choice(1000, 20, replace=False) + series[anomaly_indices] += np.random.normal(loc=0, scale=4, size=(20,)) + labels[anomaly_indices] = 1 + + ad = LSTM_AD( + n_layers=4, n_nodes=16, window_size=10, prediction_horizon=1, n_epochs=1 + ) + ad.fit(series, labels, axis=0) + pred = ad.predict(series, axis=0) + + assert pred.shape == (1000,) + assert pred.dtype == np.int_ + + +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency not available", +) +def test_lstmad_multivariate(): + """Test LSTM_AD multivariate output.""" + series = make_series( + n_timepoints=1000, n_columns=3, return_numpy=True, random_state=42 + ) + labels = np.zeros(1000).astype(int) + + # Create anomalies + anomaly_indices = np.random.choice(1000, 50, replace=False) + series[anomaly_indices] += np.random.normal(loc=0, scale=4, size=(50, 3)) + labels[anomaly_indices] = 1 + + ad = LSTM_AD( + n_layers=4, n_nodes=16, window_size=10, prediction_horizon=1, n_epochs=1 + ) + ad.fit(series, labels, axis=0) + pred = ad.predict(series, axis=0) + + assert pred.shape == (1000,) + assert pred.dtype == np.int_ diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py index 5d8a87f2a8..6671baf5ef 100644 --- a/aeon/networks/__init__.py +++ b/aeon/networks/__init__.py @@ -18,6 +18,7 @@ "AEAttentionBiGRUNetwork", "AEDRNNNetwork", "AEBiGRUNetwork", + "LSTMNetwork", ] from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork from aeon.networks._ae_bgru import AEBiGRUNetwork @@ -31,6 +32,7 @@ from aeon.networks._fcn import FCNNetwork from aeon.networks._inception import InceptionNetwork from aeon.networks._lite import LITENetwork +from aeon.networks._lstm import LSTMNetwork from aeon.networks._mlp import MLPNetwork from aeon.networks._resnet import ResNetNetwork from aeon.networks._tapnet import TapNetNetwork diff --git a/aeon/networks/_lstm.py b/aeon/networks/_lstm.py new file mode 100644 index 0000000000..dbabd3c482 --- /dev/null +++ b/aeon/networks/_lstm.py @@ -0,0 +1,66 @@ +"""Long Short Term Memory Network (LSTMNetwork).""" + +from aeon.networks.base import BaseDeepLearningNetwork + + +class LSTMNetwork(BaseDeepLearningNetwork): + """Establish the network structure for an LSTM. + + Inspired by _[1]. + + References + ---------- + .. [1] Malhotra Pankaj, Lovekesh Vig, Gautam Shroff, and Puneet Agarwal. + Long Short Term Memory Networks for Anomaly Detection in Time Series. In Proceedings + of the European Symposium on Artificial Neural Networks, Computational Intelligence + and Machine Learning (ESANN), Vol. 23, 2015. + https://www.esann.org/sites/default/files/proceedings/legacy/es2015-56.pdf + """ + + def __init__( + self, + n_nodes=64, + n_layers=2, + prediction_horizon=1, + ): + self.n_nodes = n_nodes + self.n_layers = n_layers + self.prediction_horizon = prediction_horizon + super().__init__() + + def build_network(self, input_shape, **kwargs): + """Construct an LSTM network and return its input and output layers. + + Parameters + ---------- + input_shape : tuple of shape = (window_size (w), n_channels (d)) + The shape of the data fed into the input layer + n_nodes : int, optional (default=64) + The number of LSTM units in each layer + n_layers : int, optional (default=2) + The number of LSTM layers + + Returns + ------- + input_layer : a keras layer + output_layer : a keras layer + """ + import tensorflow as tf + + # Input layer for the LSTM model + input_layer = tf.keras.layers.Input(shape=input_shape) + + # Build the LSTM layers + x = input_layer + for _ in range(self.n_layers - 1): + x = tf.keras.layers.LSTM(self.n_nodes, return_sequences=True)(x) + + # Last LSTM layer with return_sequences=False to output final representation + x = tf.keras.layers.LSTM(self.n_nodes, return_sequences=False)(x) + + # Output Dense layer + output_layer = tf.keras.layers.Dense(input_shape[1] * self.prediction_horizon)( + x + ) + + return input_layer, output_layer diff --git a/aeon/networks/tests/test_all_networks.py b/aeon/networks/tests/test_all_networks.py index 106a5b8b4f..c06c5d6759 100644 --- a/aeon/networks/tests/test_all_networks.py +++ b/aeon/networks/tests/test_all_networks.py @@ -39,7 +39,10 @@ def test_all_networks_functionality(network): if _check_soft_dependencies( network._config["python_dependencies"], severity="none" ) and _check_python_version(network._config["python_version"], severity="none"): - my_network = network() + if network.__name__ == "LSTMNetwork": + my_network = network(n_nodes=50, n_layers=2, prediction_horizon=1) + else: + my_network = network() if network._config["structure"] == "auto-encoder": encoder, decoder = my_network.build_network(input_shape=input_shape) diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index a3b36171c5..bdd3e2558d 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -80,3 +80,4 @@ Detectors PyODAdapter STRAY STOMP + LSTM_AD