Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reformatting save and dump functions. #87

Merged
merged 11 commits into from
Nov 14, 2024
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ Version X.Y.Z stands for:

-------------

## Version 4.0.0

### Changes
- The `dump` and `load` functions are now inherited from the BaseTimeseriesRegressor.
- Added abstract functions `dump_parameters` and `load_parameters` for dumping and loading model files.
- Implemented `dump_parameters` and `load_parameters` for models.
- Outliers in the `_interactive_quantile_plot` and `_static_quantile_plot` functions must now be within or *equal* to the quantile boundaries.
## Version 3.2.1

### Changes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ packages = [

[project]
name = "sam"
version = "3.2.1"
version = "4.0.0"
description = "Time series anomaly detection and forecasting"
readme = "README.md"
requires-python = ">=3.9"
Expand Down
65 changes: 57 additions & 8 deletions sam/models/base_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import warnings
from abc import ABC, abstractmethod
from operator import itemgetter
from typing import Callable, List, Sequence, Tuple, Union
from pathlib import Path
from typing import Callable, List, Sequence, Tuple, Union, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -643,11 +644,12 @@ def score(self, X: pd.DataFrame, y: pd.Series) -> float:
return score

@abstractmethod
def dump(self, foldername: str, prefix: str = "model") -> None:
"""Save a model to disk
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
"""
Save a model to disk

This abstract method needs to be implemented by any class inheriting from
BaseTimeseriesRegressor. This function dumps the SAM model to disk.
BaseTimeseriesRegressor. This function dumps the SAM model parameters to disk.

Parameters
----------
Expand All @@ -656,11 +658,50 @@ def dump(self, foldername: str, prefix: str = "model") -> None:
prefix : str, optional
The prefix used in the filename, by default "model"
"""
return None
...

@classmethod
def dump(self, foldername: str, prefix: str = "model"):
"""
Writes the following files:
* prefix.pkl
* prefix.h5

to the folder given by foldername. prefix is configurable, and is
'model' by default

Overwrites the abstract method from BaseTimeseriesRegressor

Parameters
----------
foldername: str
The name of the folder to save the model
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
import cloudpickle

backup = None
if hasattr(self, "model_"):
check_is_fitted(self, "model_")
self.dump_parameters(foldername=foldername, prefix=prefix)
# Set the models to None temporarily, because they can't be pickled
backup, self.model_ = self.model_, None

foldername = Path(foldername)

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)

if backup is not None:
self.model_ = backup

@staticmethod
@abstractmethod
def load(cls, foldername, prefix="model"):
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any: ...

@classmethod
def load(cls, foldername: str, prefix: str = "model"):
"""Load a model from disk

This abstract method needs to be implemented by any class inheriting from
Expand All @@ -677,4 +718,12 @@ def load(cls, foldername, prefix="model"):
-------
The SAM model that has been loaded from disk
"""
return None
import cloudpickle

with open(Path(foldername) / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

model = obj.load_parameters(obj, foldername=foldername, prefix=prefix)
if model is not None:
obj.model_ = model
return obj
49 changes: 8 additions & 41 deletions sam/models/constant_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,49 +288,16 @@ def predict(
else:
return prediction

def dump(self, foldername: str, prefix: str = "model") -> None:
"""
Writes the instanced model to foldername/prefix.pkl

prefix is configurable, and is 'model' by default

Overwrites the abstract method from SamQuantileRegressor

Parameters
----------
foldername: str
The name of the folder to save the model
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
check_is_fitted(self, "model_")

def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
import cloudpickle

foldername = Path(foldername)

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)
with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f:
cloudpickle.dump(self.model_, f)

@classmethod
def load(cls, foldername, prefix="model") -> Callable:
"""
Reads and loads the model located at foldername/prefix.pkl

prefix is configurable, and is 'model' by default
Output is an entire instance of the fitted model that was saved

Overwrites the abstract method from SamQuantileRegressor

Returns
-------
A fitted ConstantTimeseriesRegressor object
"""
@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
import cloudpickle

foldername = Path(foldername)
with open(foldername / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

return obj
with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f:
model = cloudpickle.load(f)
return model
57 changes: 15 additions & 42 deletions sam/models/lasso_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Callable, Sequence, Tuple, Union
from pathlib import Path
from typing import Callable, Sequence, Tuple, Union, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -191,43 +191,16 @@ def predict(
else:
return prediction

def dump(self, foldername: str, prefix: str = "model") -> None:
"""Save a model to disk

This abstract method needs to be implemented by any class inheriting from
SamQuantileRegressor. This function dumps the SAM model to disk.

Parameters
----------
foldername : str
The folder location where to save the model
prefix : str, optional
The prefix used in the filename, by default "model"
"""
import joblib

if not os.path.exists(foldername):
os.makedirs(foldername)
joblib.dump(self, os.path.join(foldername, f"{prefix}.pkl"))

@classmethod
def load(cls, foldername, prefix="model") -> Callable:
"""Load a model from disk

This abstract method needs to be implemented by any class inheriting from
SamQuantileRegressor. This function loads a SAM model from disk.

Parameters
----------
foldername : str
The folder location where the model is stored
prefix : str, optional
The prefix used in the filename, by default "model"

Returns
-------
The SAM model that has been loaded from disk
"""
import joblib

return joblib.load(os.path.join(foldername, f"{prefix}.pkl"))
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
import cloudpickle

with open(Path(foldername) / f"{prefix}_params.pkl", "wb") as f:
cloudpickle.dump(self.model_, f)

@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
import cloudpickle

with open(Path(foldername) / f"{prefix}_params.pkl", "rb") as f:
model = cloudpickle.load(f)
return model
42 changes: 8 additions & 34 deletions sam/models/mlp_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Callable, Sequence, Tuple, Union, Optional
from typing import Callable, Sequence, Tuple, Union, Optional, Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -343,10 +343,9 @@ def predict(
else:
return prediction

def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None:
def dump_parameters(self, foldername: str, prefix: str = "model") -> None:
miguelpher marked this conversation as resolved.
Show resolved Hide resolved
"""
Writes the following files:
* prefix.pkl
* prefix.h5

to the folder given by foldername. prefix is configurable, and is
Expand All @@ -361,54 +360,29 @@ def dump(self, foldername: Union[str, Path], prefix: str = "model") -> None:
prefix: str, optional (Default='model')
The name of the model
"""
# This function only works if the estimator is fitted
check_is_fitted(self, "model_")

import cloudpickle

foldername = Path(foldername)

# TEMPORARY
self.model_.save(foldername / (prefix + ".h5"))

# Set the models to None temporarily, because they can't be pickled
backup, self.model_ = self.model_, None

with open(foldername / (prefix + ".pkl"), "wb") as f:
cloudpickle.dump(self, f)

# Set it back
self.model_ = backup

@classmethod
def load(cls, foldername: Union[str, Path], prefix="model"):
@staticmethod
def load_parameters(obj, foldername: str, prefix: str = "model") -> Any:
"""
Reads the following files:
* prefix.pkl
Loads the file:
* prefix.h5

from the folder given by foldername. prefix is configurable, and is
'model' by default
Output is an entire instance of the fitted model that was saved
Output is the `model_` attribute of the MLPTimeseriesRegressor class.

Overwrites the abstract method from BaseTimeseriesRegressor

Returns
-------
Keras model
"""
import cloudpickle
from tensorflow import keras
import keras

foldername = Path(foldername)
with open(foldername / (prefix + ".pkl"), "rb") as f:
obj = cloudpickle.load(f)

loss = obj._get_loss()
obj.model_ = keras.models.load_model(
return keras.models.load_model(
foldername / (prefix + ".h5"), custom_objects={"mse_tilted": loss}
)
return obj

def _get_loss(self) -> Union[str, Callable]:
"""
Expand Down
4 changes: 2 additions & 2 deletions sam/visualization/quantile_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _interactive_quantile_plot(
if outlier_min_q is not None and outliers is None:
valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]]
valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]]
outliers = (y_true > valid_high) | (y_true < valid_low)
outliers = (y_true >= valid_high) | (y_true <= valid_low)
outliers = outliers.astype(int)
k = np.ones(outlier_window)
outliers = (
Expand Down Expand Up @@ -379,7 +379,7 @@ def _static_quantile_plot(
if outlier_min_q is not None and outliers is None:
valid_low = y_hat[these_cols[col_order[n_quants - 1 - (outlier_min_q - 1)]]]
valid_high = y_hat[these_cols[col_order[n_quants + (outlier_min_q - 1)]]]
outliers = (y_true > valid_high) | (y_true < valid_low)
outliers = (y_true >= valid_high) | (y_true <= valid_low)
outliers = outliers.astype(int)
k = np.ones(outlier_window)
outliers = (
Expand Down
Loading