Skip to content

Commit

Permalink
Reformatting save and dump functions. (#87)
Browse files Browse the repository at this point in the history
* ✨ Added an optional argument to add a custom optimizer.

* 🚨 Linting

* 🐛 Removed second compile for the quantile rnn.

* 📝 Added optimizer docstring.

* 🎨 Cleaned up repeated code in dump and load functions for classes inheriting BaseTimeseriesRegressor.

* 🔖 Changed release version to 4.0.0

* 📝 Fixed CHANGELOG.md to have the right version.

* 🚨 Fixed linting problems.

* 🚨 Fixed linting problems.

* 🐛 Fixed outliers shown in the quantile plots.

---------

Co-authored-by: Adjorn <[email protected]>
  • Loading branch information
amobular and Adjorn authored Nov 14, 2024
1 parent a2dd806 commit 0126b73
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 128 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ Version X.Y.Z stands for:

-------------

## Version 4.0.0

### Changes
- The `dump` and `load` functions are now inherited from the BaseTimeseriesRegressor.
- Added abstract functions `dump_parameters` and `load_parameters` for dumping and loading model files.
- Implemented `dump_parameters` and `load_parameters` for models.
- Outliers in the `_interactive_quantile_plot` and `_static_quantile_plot` functions must now be within or *equal* to the quantile boundaries.
## Version 3.2.1

### Changes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ packages = [

[project]
name = "sam"
version = "3.2.1"
version = "4.0.0"
description = "Time series anomaly detection and forecasting"
readme = "README.md"
requires-python = ">=3.9"
Expand Down
65 changes: 57 additions & 8 deletions sam/models/base_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import warnings
from abc import ABC, abstractmethod
from operator import itemgetter
from typing import Callable, List, Sequence, Tuple, Union
from pathlib import Path
from typing import Callable, List, Sequence, Tuple, Union, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -643,11 +644,12 @@ def score(self, X: pd.DataFrame, y: pd.Series) -> float:
return score

@abstractmethod
def dump(self, foldername: str, prefix: str = "model") -> None:
"""Save a model to disk
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
"""
Save a model to disk
This abstract method needs to be implemented by any class inheriting from
BaseTimeseriesRegressor. This function dumps the SAM model to disk.
BaseTimeseriesRegressor. This function dumps the SAM model parameters to disk.
Parameters
----------
Expand All @@ -656,11 +658,50 @@ def dump(self, foldername: str, prefix: str = "model") -> None:
prefix : str, optional
The prefix used in the filename, by default "model"
"""
return None
...

@classmethod
def dump(self, foldername: str, prefix: str = "model"):
"""
Writes the following files:
* prefix.pkl
* prefix.h5
to the folder given by foldername. prefix is configurable, and is
'model' by default
Overwrites the abstract method from BaseTimeseriesRegressor
Parameters
----------
foldername: str
The name of the folder to save the model
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
import cloudpickle

backup = None
if hasattr(self, "model_"):
check_is_fitted(self, "model_")
self.dump_parameters(foldername=foldername, prefix=prefix)
# Set the models to None temporarily, because they can't be pickled
backup, self.model_ = self.model_, None

foldername = Path(foldername)

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)

if backup is not None:
self.model_ = backup

@staticmethod
@abstractmethod
def load(cls, foldername, prefix="model"):
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: ...

@classmethod
def load(cls, foldername: str, prefix: str = "model"):
"""Load a model from disk
This abstract method needs to be implemented by any class inheriting from
Expand All @@ -677,4 +718,12 @@ def load(cls, foldername, prefix="model"):
-------
The SAM model that has been loaded from disk
"""
return None
import cloudpickle

with open(Path(foldername) / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

model = obj.load_parameters(obj, foldername=foldername, prefix=prefix)
if model is not None:
obj.model_ = model
return obj
49 changes: 8 additions & 41 deletions sam/models/constant_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,49 +288,16 @@ def predict(
else:
return prediction

def dump(self, foldername: str, prefix: str = "model") -> None:
"""
Writes the instanced model to foldername/prefix.pkl
prefix is configurable, and is 'model' by default
Overwrites the abstract method from SamQuantileRegressor
Parameters
----------
foldername: str
The name of the folder to save the model
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
check_is_fitted(self, "model_")

def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
import cloudpickle

foldername = Path(foldername)

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)
with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f:
cloudpickle.dump(self.model_, f)

@classmethod
def load(cls, foldername, prefix="model") -> Callable:
"""
Reads and loads the model located at foldername/prefix.pkl
prefix is configurable, and is 'model' by default
Output is an entire instance of the fitted model that was saved
Overwrites the abstract method from SamQuantileRegressor
Returns
-------
A fitted ConstantTimeseriesRegressor object
"""
@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
import cloudpickle

foldername = Path(foldername)
with open(foldername / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

return obj
with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f:
model = cloudpickle.load(f)
return model
57 changes: 15 additions & 42 deletions sam/models/lasso_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Callable, Sequence, Tuple, Union
from pathlib import Path
from typing import Callable, Sequence, Tuple, Union, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -191,43 +191,16 @@ def predict(
else:
return prediction

def dump(self, foldername: str, prefix: str = "model") -> None:
"""Save a model to disk
This abstract method needs to be implemented by any class inheriting from
SamQuantileRegressor. This function dumps the SAM model to disk.
Parameters
----------
foldername : str
The folder location where to save the model
prefix : str, optional
The prefix used in the filename, by default "model"
"""
import joblib

if not os.path.exists(foldername):
os.makedirs(foldername)
joblib.dump(self, os.path.join(foldername, f"{prefix}.pkl"))

@classmethod
def load(cls, foldername, prefix="model") -> Callable:
"""Load a model from disk
This abstract method needs to be implemented by any class inheriting from
SamQuantileRegressor. This function loads a SAM model from disk.
Parameters
----------
foldername : str
The folder location where the model is stored
prefix : str, optional
The prefix used in the filename, by default "model"
Returns
-------
The SAM model that has been loaded from disk
"""
import joblib

return joblib.load(os.path.join(foldername, f"{prefix}.pkl"))
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
import cloudpickle

with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f:
cloudpickle.dump(self.model_, f)

@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
import cloudpickle

with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f:
model = cloudpickle.load(f)
return model
42 changes: 8 additions & 34 deletions sam/models/mlp_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Callable, Sequence, Tuple, Union, Optional
from typing import Callable, Sequence, Tuple, Union, Optional, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -343,10 +343,9 @@ def predict(
else:
return prediction

def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None:
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
"""
Writes the following files:
* prefix.pkl
* prefix.h5
to the folder given by foldername. prefix is configurable, and is
Expand All @@ -361,54 +360,29 @@ def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None:
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
check_is_fitted(self, "model_")

import cloudpickle

foldername = Path(foldername)

# TEMPORARY
self.model_.save(foldername / (prefix + ".h5"))

# Set the models to None temporarily, because they can't be pickled
backup, self.model_ = self.model_, None

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)

# Set it back
self.model_ = backup

@classmethod
def load(cls, foldername: Union[str, Path], prefix="model"):
@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
"""
Reads the following files:
* prefix.pkl
Loads the file:
* prefix.h5
from the folder given by foldername. prefix is configurable, and is
'model' by default
Output is an entire instance of the fitted model that was saved
Output is the `model_` attribute of the MLPTimeseriesRegressor class.
Overwrites the abstract method from BaseTimeseriesRegressor
Returns
-------
Keras model
"""
import cloudpickle
from tensorflow import keras
import keras

foldername = Path(foldername)
with open(foldername / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

loss = obj._get_loss()
obj.model_ = keras.models.load_model(
return keras.models.load_model(
foldername / (prefix + ".h5"), custom_objects={"mse_tilted": loss}
)
return obj

def _get_loss(self) -> Union[str, Callable]:
"""
Expand Down
4 changes: 2 additions & 2 deletions sam/visualization/quantile_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _interactive_quantile_plot(
if outlier_min_q is not None and outliers is None:
valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]]
valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]]
outliers = (y_true > valid_high) | (y_true < valid_low)
outliers = (y_true >= valid_high) | (y_true <= valid_low)
outliers = outliers.astype(int)
k = np.ones(outlier_window)
outliers = (
Expand Down Expand Up @@ -379,7 +379,7 @@ def _static_quantile_plot(
if outlier_min_q is not None and outliers is None:
valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]]
valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]]
outliers = (y_true > valid_high) | (y_true < valid_low)
outliers = (y_true >= valid_high) | (y_true <= valid_low)
outliers = outliers.astype(int)
k = np.ones(outlier_window)
outliers = (
Expand Down

0 comments on commit 0126b73

Please sign in to comment.