From 0126b738ce943378873365d140f6d0b13b37a52c Mon Sep 17 00:00:00 2001 From: Adjorn van Engelenhoven <48053700+amobular@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:51:10 +0100 Subject: [PATCH] Reformatting `save` and `dump` functions. (#87) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Added an optional argument to add a custom optimizer. * 🚨 Linting * 🐛 Removed second compile for the quantile rnn. * 📝 Added optimizer docstring. * 🎨 Cleaned up repeated code in dump and load functions for classes inheriting BaseTimeseriesRegressor. * 🔖 Changed release version to 4.0.0 * 📝 Fixed CHANGELOG.md to have the right version. * 🚨 Fixed linting problems. * 🚨 Fixed linting problems. * 🐛 Fixed outliers shown in the quantile plots. --------- Co-authored-by: Adjorn --- CHANGELOG.md | 7 ++++ pyproject.toml | 2 +- sam/models/base_model.py | 65 ++++++++++++++++++++++++++---- sam/models/constant_model.py | 49 ++++------------------ sam/models/lasso_model.py | 57 +++++++------------------- sam/models/mlp_model.py | 42 ++++--------------- sam/visualization/quantile_plot.py | 4 +- 7 files changed, 98 insertions(+), 128 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8aee2af..8b7c4e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ Version X.Y.Z stands for: ------------- +## Version 4.0.0 + +### Changes +- The `dump` and `load` functions are now inherited from the BaseTimeseriesRegressor. +- Added abstract functions `dump_parameters` and `load_parameters` for dumping and loading model files. +- Implemented `dump_parameters` and `load_parameters` for models. +- Outliers in the `_interactive_quantile_plot` and `_static_quantile_plot` functions must now be within or *equal* to the quantile boundaries. ## Version 3.2.1 ### Changes diff --git a/pyproject.toml b/pyproject.toml index 2bf18fc..95c16ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ packages = [ [project] name = "sam" -version = "3.2.1" +version = "4.0.0" description = "Time series anomaly detection and forecasting" readme = "README.md" requires-python = ">=3.9" diff --git a/sam/models/base_model.py b/sam/models/base_model.py index 99ea731..6074a7a 100644 --- a/sam/models/base_model.py +++ b/sam/models/base_model.py @@ -1,7 +1,8 @@ import warnings from abc import ABC, abstractmethod from operator import itemgetter -from typing import Callable, List, Sequence, Tuple, Union +from pathlib import Path +from typing import Callable, List, Sequence, Tuple, Union, Any import numpy as np import pandas as pd @@ -643,11 +644,12 @@ def score(self, X: pd.DataFrame, y: pd.Series) -> float: return score @abstractmethod - def dump(self, foldername: str, prefix: str = "model") -> None: - """Save a model to disk + def dump_parameters(self, foldername: str, prefix: str = "model") -> None: + """ + Save a model to disk This abstract method needs to be implemented by any class inheriting from - BaseTimeseriesRegressor. This function dumps the SAM model to disk. + BaseTimeseriesRegressor. This function dumps the SAM model parameters to disk. Parameters ---------- @@ -656,11 +658,50 @@ def dump(self, foldername: str, prefix: str = "model") -> None: prefix : str, optional The prefix used in the filename, by default "model" """ - return None + ... - @classmethod + def dump(self, foldername: str, prefix: str = "model"): + """ + Writes the following files: + * prefix.pkl + * prefix.h5 + + to the folder given by foldername. prefix is configurable, and is + 'model' by default + + Overwrites the abstract method from BaseTimeseriesRegressor + + Parameters + ---------- + foldername: str + The name of the folder to save the model + prefix: str, optional (Default='model') + The name of the model + """ + # This function only works if the estimator is fitted + import cloudpickle + + backup = None + if hasattr(self, "model_"): + check_is_fitted(self, "model_") + self.dump_parameters(foldername=foldername, prefix=prefix) + # Set the models to None temporarily, because they can't be pickled + backup, self.model_ = self.model_, None + + foldername = Path(foldername) + + with open(foldername / (prefix + ".pkl"), "wb") as f: + cloudpickle.dump(self, f) + + if backup is not None: + self.model_ = backup + + @staticmethod @abstractmethod - def load(cls, foldername, prefix="model"): + def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: ... + + @classmethod + def load(cls, foldername: str, prefix: str = "model"): """Load a model from disk This abstract method needs to be implemented by any class inheriting from @@ -677,4 +718,12 @@ def load(cls, foldername, prefix="model"): ------- The SAM model that has been loaded from disk """ - return None + import cloudpickle + + with open(Path(foldername) / (prefix + ".pkl"), "rb") as f: + obj = cloudpickle.load(f) + + model = obj.load_parameters(obj, foldername=foldername, prefix=prefix) + if model is not None: + obj.model_ = model + return obj diff --git a/sam/models/constant_model.py b/sam/models/constant_model.py index f95a2d6..7cc1856 100644 --- a/sam/models/constant_model.py +++ b/sam/models/constant_model.py @@ -288,49 +288,16 @@ def predict( else: return prediction - def dump(self, foldername: str, prefix: str = "model") -> None: - """ - Writes the instanced model to foldername/prefix.pkl - - prefix is configurable, and is 'model' by default - - Overwrites the abstract method from SamQuantileRegressor - - Parameters - ---------- - foldername: str - The name of the folder to save the model - prefix: str, optional (Default='model') - The name of the model - """ - # This function only works if the estimator is fitted - check_is_fitted(self, "model_") - + def dump_parameters(self, foldername: str, prefix: str = "model") -> None: import cloudpickle - foldername = Path(foldername) - - with open(foldername / (prefix + ".pkl"), "wb") as f: - cloudpickle.dump(self, f) + with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f: + cloudpickle.dump(self.model_, f) - @classmethod - def load(cls, foldername, prefix="model") -> Callable: - """ - Reads and loads the model located at foldername/prefix.pkl - - prefix is configurable, and is 'model' by default - Output is an entire instance of the fitted model that was saved - - Overwrites the abstract method from SamQuantileRegressor - - Returns - ------- - A fitted ConstantTimeseriesRegressor object - """ + @staticmethod + def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: import cloudpickle - foldername = Path(foldername) - with open(foldername / (prefix + ".pkl"), "rb") as f: - obj = cloudpickle.load(f) - - return obj + with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f: + model = cloudpickle.load(f) + return model diff --git a/sam/models/lasso_model.py b/sam/models/lasso_model.py index 2eda255..31e6c87 100644 --- a/sam/models/lasso_model.py +++ b/sam/models/lasso_model.py @@ -1,5 +1,5 @@ -import os -from typing import Callable, Sequence, Tuple, Union +from pathlib import Path +from typing import Callable, Sequence, Tuple, Union, Any import numpy as np import pandas as pd @@ -191,43 +191,16 @@ def predict( else: return prediction - def dump(self, foldername: str, prefix: str = "model") -> None: - """Save a model to disk - - This abstract method needs to be implemented by any class inheriting from - SamQuantileRegressor. This function dumps the SAM model to disk. - - Parameters - ---------- - foldername : str - The folder location where to save the model - prefix : str, optional - The prefix used in the filename, by default "model" - """ - import joblib - - if not os.path.exists(foldername): - os.makedirs(foldername) - joblib.dump(self, os.path.join(foldername, f"{prefix}.pkl")) - - @classmethod - def load(cls, foldername, prefix="model") -> Callable: - """Load a model from disk - - This abstract method needs to be implemented by any class inheriting from - SamQuantileRegressor. This function loads a SAM model from disk. - - Parameters - ---------- - foldername : str - The folder location where the model is stored - prefix : str, optional - The prefix used in the filename, by default "model" - - Returns - ------- - The SAM model that has been loaded from disk - """ - import joblib - - return joblib.load(os.path.join(foldername, f"{prefix}.pkl")) + def dump_parameters(self, foldername: str, prefix: str = "model") -> None: + import cloudpickle + + with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f: + cloudpickle.dump(self.model_, f) + + @staticmethod + def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: + import cloudpickle + + with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f: + model = cloudpickle.load(f) + return model diff --git a/sam/models/mlp_model.py b/sam/models/mlp_model.py index b6e5f08..312f11a 100644 --- a/sam/models/mlp_model.py +++ b/sam/models/mlp_model.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Sequence, Tuple, Union, Optional +from typing import Callable, Sequence, Tuple, Union, Optional, Any import numpy as np import pandas as pd @@ -343,10 +343,9 @@ def predict( else: return prediction - def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None: + def dump_parameters(self, foldername: str, prefix: str = "model") -> None: """ Writes the following files: - * prefix.pkl * prefix.h5 to the folder given by foldername. prefix is configurable, and is @@ -361,54 +360,29 @@ def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None: prefix: str, optional (Default='model') The name of the model """ - # This function only works if the estimator is fitted check_is_fitted(self, "model_") - - import cloudpickle - foldername = Path(foldername) - - # TEMPORARY self.model_.save(foldername / (prefix + ".h5")) - # Set the models to None temporarily, because they can't be pickled - backup, self.model_ = self.model_, None - - with open(foldername / (prefix + ".pkl"), "wb") as f: - cloudpickle.dump(self, f) - - # Set it back - self.model_ = backup - - @classmethod - def load(cls, foldername: Union[str, Path], prefix="model"): + @staticmethod + def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: """ - Reads the following files: - * prefix.pkl + Loads the file: * prefix.h5 from the folder given by foldername. prefix is configurable, and is 'model' by default - Output is an entire instance of the fitted model that was saved + Output is the `model_` attribute of the MLPTimeseriesRegressor class. Overwrites the abstract method from BaseTimeseriesRegressor - - Returns - ------- - Keras model """ - import cloudpickle - from tensorflow import keras + import keras foldername = Path(foldername) - with open(foldername / (prefix + ".pkl"), "rb") as f: - obj = cloudpickle.load(f) - loss = obj._get_loss() - obj.model_ = keras.models.load_model( + return keras.models.load_model( foldername / (prefix + ".h5"), custom_objects={"mse_tilted": loss} ) - return obj def _get_loss(self) -> Union[str, Callable]: """ diff --git a/sam/visualization/quantile_plot.py b/sam/visualization/quantile_plot.py index d7e0502..2118af4 100644 --- a/sam/visualization/quantile_plot.py +++ b/sam/visualization/quantile_plot.py @@ -271,7 +271,7 @@ def _interactive_quantile_plot( if outlier_min_q is not None and outliers is None: valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]] valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]] - outliers = (y_true > valid_high) | (y_true < valid_low) + outliers = (y_true >= valid_high) | (y_true <= valid_low) outliers = outliers.astype(int) k = np.ones(outlier_window) outliers = ( @@ -379,7 +379,7 @@ def _static_quantile_plot( if outlier_min_q is not None and outliers is None: valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]] valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]] - outliers = (y_true > valid_high) | (y_true < valid_low) + outliers = (y_true >= valid_high) | (y_true <= valid_low) outliers = outliers.astype(int) k = np.ones(outlier_window) outliers = (