From 8115979f84b50ffa4b208a0749548fe015b839c5 Mon Sep 17 00:00:00 2001 From: alexmindset Date: Thu, 28 Jul 2022 21:06:01 +0300 Subject: [PATCH 1/5] Adding README.md codecov badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6f6b6e0..163f084 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![PyPI](https://img.shields.io/pypi/v/insolver?style=flat)](https://pypi.org/project/insolver/) [![Documentation Status](https://readthedocs.org/projects/insolver/badge/?version=latest)](https://insolver.readthedocs.io/en/latest/?badge=latest) [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/MindSetLib/Insolver/Insolver%20testing?logo=github&label=tests)](https://github.com/MindSetLib/Insolver/actions) +[![Coverage](https://codecov.io/github/MindSetLib/Insolver/coverage.svg?branch=master)](https://codecov.io/github/MindSetLib/Insolver) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Downloads](https://pepy.tech/badge/insolver/week)](https://pepy.tech/project/insolver) From 16866b21ef4a1f968f672079ab30264510c8bc34 Mon Sep 17 00:00:00 2001 From: alexmindset Date: Thu, 4 Aug 2022 00:47:12 +0300 Subject: [PATCH 2/5] Maintenance: Fixing requirements; Development: New version of InsolverGLMWrapper --- insolver/transforms/core.py | 10 +- insolver/utils.py | 6 +- insolver/wrappers_v2/__init__.py | 3 + insolver/wrappers_v2/base.py | 129 +++++ insolver/wrappers_v2/glm.py | 453 ++++++++++++++++++ insolver/wrappers_v2/utils/__init__.py | 3 + insolver/wrappers_v2/utils/h2o_utils.py | 120 +++++ insolver/wrappers_v2/utils/save_load_utils.py | 89 ++++ requirements.txt | 21 +- tests/test_wrappers_v2_InsolverBaseWrapper.py | 51 ++ 10 files changed, 870 insertions(+), 15 deletions(-) create mode 100644 insolver/wrappers_v2/__init__.py create mode 100644 insolver/wrappers_v2/base.py create mode 100644 insolver/wrappers_v2/glm.py create mode 100644 insolver/wrappers_v2/utils/__init__.py create mode 100644 insolver/wrappers_v2/utils/h2o_utils.py create mode 100644 insolver/wrappers_v2/utils/save_load_utils.py create mode 100644 tests/test_wrappers_v2_InsolverBaseWrapper.py diff --git a/insolver/transforms/core.py b/insolver/transforms/core.py index 1bc6c17..8ffb474 100644 --- a/insolver/transforms/core.py +++ b/insolver/transforms/core.py @@ -41,8 +41,8 @@ def __init__(self, data: Any, transforms: Union[List, Dict[str, Union[List, Dict if isinstance(transforms, list): self.transforms = transforms elif isinstance(transforms, dict) and _check_transforms(transforms): - for key in transforms.keys(): - setattr(self, key, transforms[key]) + for key, value in transforms.items(): + setattr(self, key, value) self.transforms_done: Dict = dict() @@ -67,9 +67,9 @@ def _check_colnames_dtypes(expected: Dict[str, dtype], input_: Dict[str, dtype], expected = {key: expected[key] for key in common_cols} if expected != input_: - for key in expected.keys(): - if expected[key] != input_[key]: - message = f"{key}: input {input_[key]}, expected {expected[key]}" + for key, value in expected.items(): + if value != input_[key]: + message = f"{key}: input {input_[key]}, expected {value}" warn_insolver(f'{step.capitalize()} column dtype mismatch: Column {message}!', TransformsWarning) def ins_transform(self) -> Dict: diff --git a/insolver/utils.py b/insolver/utils.py index d749bd3..eb8d8be 100644 --- a/insolver/utils.py +++ b/insolver/utils.py @@ -1,9 +1,11 @@ import warnings from typing import Type, Union, Optional -try: +import sys + +if sys.version_info >= (3, 8): from typing import Literal -except ImportError: +else: from typing_extensions import Literal diff --git a/insolver/wrappers_v2/__init__.py b/insolver/wrappers_v2/__init__.py new file mode 100644 index 0000000..dba07c9 --- /dev/null +++ b/insolver/wrappers_v2/__init__.py @@ -0,0 +1,3 @@ +from insolver.wrappers_v2.utils import load_model +from .base import InsolverBaseWrapper +from .glm import InsolverGLMWrapper diff --git a/insolver/wrappers_v2/base.py b/insolver/wrappers_v2/base.py new file mode 100644 index 0000000..f1f7159 --- /dev/null +++ b/insolver/wrappers_v2/base.py @@ -0,0 +1,129 @@ +import os +import time +import json +from io import BytesIO +from os import PathLike +from copy import deepcopy +from zipfile import ZipFile, ZIP_DEFLATED +from typing import Union, Any, Dict, Callable + +import sys + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + + +class InsolverWrapperWarning(Warning): + def __init__(self, message: str) -> None: + self.message = message + + def __str__(self) -> str: + return repr(self.message) + + +class InsolverBaseWrapper: + """Base wrapper serving as a building block for other wrappers.""" + + model: Any = None + metadata: Dict[str, Any] = dict() + backend: str = '' + task: str = '' + algo: str = '' + _backend_saving_methods: Dict[str, Dict[str, Callable]] = dict() + _model_cached: Any = None + + def __call__(self) -> Any: + return self.model + + def _get_init_args(self, vars_: Any) -> None: + copy_vars = deepcopy(vars_) + copy_vars.pop("self") + self.metadata = {"init_params": copy_vars, 'is_fitted': False, 'algo': self.__class__.algo} + + def _update_metadata(self) -> None: + _metadata = self.__dict__.copy() + _metadata = { + key: _metadata[key] for key in _metadata if not (key in ['model', 'metadata'] or key.startswith('_')) + } + self.metadata.update(_metadata) + + def _save_insolver(self, path_or_buf: Union[str, PathLike[str]], method: Callable, **kwargs: Any) -> None: + buffer = BytesIO() + with ZipFile(buffer, mode="w", compression=ZIP_DEFLATED) as zip_file: + zip_file.writestr("metadata.json", json.dumps(self.metadata)) + zip_file.writestr( + f"model_{os.path.basename(path_or_buf)}", + BytesIO(method(self.model, path_or_buf=None, **kwargs)).getvalue(), + ) + + with open(path_or_buf if str(path_or_buf).endswith('.zip') else f'{path_or_buf}.zip', "wb") as f: + f.write(buffer.getvalue()) + + def save_model( + self, + path_or_buf: Union[None, str, PathLike[str]] = None, + mode: Literal['insolver', 'raw'] = "insolver", + method: str = '', + **kwargs: Any, + ) -> Union[str, bytes]: + """Saving the model contained in wrapper. + + Args: + path_or_buf (str, os.PathLike[str]): Filepath or buffer object. If None, the result is returned as a string. + mode (str, optional): Saving mode, values ['insolver', 'raw'] are supported. Option 'raw' saves fitted model + without additional metadata. Option 'insolver' saves model as a zip-file with model and json with metadata + inside. + method (str, optional): Saving method. + **kwargs: Other parameters passed to, e.g. h2o.save_model(). + """ + _modes = ["insolver", "raw"] + + if mode not in _modes: + raise ValueError(f"Invalid mode argument {mode}. Mode must one of {_modes}") + + if method == '' and len(self._backend_saving_methods[self.backend].keys()) > 0: + method = list(self._backend_saving_methods[self.backend].keys())[0] + elif method not in self._backend_saving_methods[self.backend].keys(): + raise ValueError( + f'Invalid method "{method}". ' + f'Supported values for "{self.backend}" backend are ' + f'{list(self._backend_saving_methods[self.backend].keys())}.' + ) + + if not self.metadata['is_fitted']: + raise ValueError("No fitted model found. Fit model first.") + + if (path_or_buf is not None) and isinstance(path_or_buf, str): + path_or_buf = os.path.abspath(path_or_buf) + if os.path.isdir(path_or_buf): + default_name = ( + f"{'insolver' if mode == 'insolver' else method}" + f"_{self.algo}_{self.backend}_{self.task}_{round(time.time() * 1000)}" + ) + path_or_buf = os.path.normpath(os.path.join(path_or_buf, default_name)) + + if path_or_buf is None: + if self._model_cached is None: + return self._backend_saving_methods[self.backend][method](self.model, path_or_buf, **kwargs) + else: + return self._model_cached + else: + if mode == "insolver": + self.metadata.update({"saving_method": method}) + if self._model_cached is None: + self._save_insolver( + path_or_buf, method=self._backend_saving_methods[self.backend][method], **kwargs + ) + else: + self._save_insolver( + path_or_buf, + method=self._backend_saving_methods[self.backend][method], + _model_cached=self._model_cached, + **kwargs, + ) + path_or_buf = f'{path_or_buf}.zip' + else: + self._backend_saving_methods[self.backend][method](self.model, path_or_buf, **kwargs) + return f"Saved model: {os.path.normpath(path_or_buf)}" diff --git a/insolver/wrappers_v2/glm.py b/insolver/wrappers_v2/glm.py new file mode 100644 index 0000000..e5a8786 --- /dev/null +++ b/insolver/wrappers_v2/glm.py @@ -0,0 +1,453 @@ +import sys + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +from os import PathLike +from typing import Optional, Dict, Any, Union, List, Tuple, Callable + +from sklearn.base import BaseEstimator +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import PoissonRegressor, GammaRegressor, TweedieRegressor, LogisticRegression, ElasticNet + +from h2o.frame import H2OFrame +from h2o.estimators.glm import H2OGeneralizedLinearEstimator + +from numpy import repeat, ndarray, insert, sum as npsum, sqrt, exp, true_divide, hstack, ones +from pandas import DataFrame, Series, concat + +from ..utils import warn_insolver +from .base import InsolverBaseWrapper, InsolverWrapperWarning +from .utils import save_pickle, save_dill, save_h2o +from .utils.h2o_utils import x_y_to_h2o_frame, h2o_start, h2o_stop, to_h2oframe, load_h2o + + +class InsolverGLMWrapper(InsolverBaseWrapper): + algo = 'glm' + _backends = ["h2o", "sklearn"] + _tasks = ["class", "reg"] + _backend_saving_methods = {'sklearn': {'pickle': save_pickle, 'dill': save_dill}, 'h2o': {'h2o': save_h2o}} + + """Insolver wrapper for Generalized Linear Models. + + Parameters: + backend (str): Framework for building GLM, currently 'h2o' and 'sklearn' are supported. + task (str): Task that GLM should solve: Classification or Regression. Values 'reg' and 'class' are supported. + family (str, float, int, optional): Distribution for GLM. Supports any family from h2o as + str. For sklearn supported `str` families are ['gaussian', 'normal', 'poisson', 'gamma', 'inverse_gaussian'], + also may be defined as `int` or `float` as a power for Tweedie GLM. By default, Gaussian GLM is fitted. + link (str, optional): Link function for GLM. If `None`, sets to default value for both h2o and sklearn. + h2o_init_params (dict, optional): Parameters passed to `h2o.init()`, when `backend` == 'h2o'. + **kwargs: Parameters for GLM estimators (for H2OGeneralizedLinearEstimator or TweedieRegressor) except + `family` (`power` for TweedieRegressor) and `link`. + + """ + + def __init__( + self, + backend: Optional[Literal['sklearn', 'h2o']], + task: Optional[Literal['class', 'reg']] = 'reg', + family: Optional[str] = None, + link: Optional[str] = None, + h2o_server_params: Optional[Dict] = None, + **kwargs: Any, + ): + self._get_init_args(vars()) + + # Checks on supported backends and tasks + if backend not in self._backends: + raise ValueError(f'Invalid "{backend}" backend argument. Supported backends: {self._backends}.') + if task not in self._tasks: + raise ValueError(f'Invalid "{task}" task argument. Supported tasks: {self._tasks}.') + + self.backend = backend + self.task = task + self.family = family + self.link = link + self.h2o_server_params = h2o_server_params + self.kwargs = kwargs + self.model = self.init_model() + self.__dict__.update(self.metadata) + + def _init_glm_sklearn(self, **params: Any) -> BaseEstimator: + model = BaseEstimator() # Just to mitigate referenced before assignment warning + + # Checks on supported families vs tasks + if self.family not in [None, 'poisson', 'gamma', 'tweedie', 'normal', 'gaussian', 'inverse_gaussian', 'logit']: + ValueError(f'Distribution family "{self.family}" is not supported with sklearn backend.') + else: + if (self.family in ['logit']) and (self.task == 'reg'): + ValueError(f'Distribution family "{self.family}" does not match the task "{self.task}".') + if (self.family not in [None, 'logit']) and (self.task == 'class'): + ValueError(f'Distribution family "{self.family}" does not match the task "{self.task}".') + if self.family is None: + self.family = 'gaussian' if self.task == 'reg' else 'logit' + + # Checks on supported families vs links + if self.family in ['gamma', 'poisson']: + self.link = 'log' if self.link is None else self.link + if self.link != 'log': + warn_insolver( + f'Link function "{self.link}" not supported for "{self.family}",using default "log" link', + InsolverWrapperWarning, + ) + if self.family in ['tweedie', 'inverse_gaussian']: + self.link = 'log' if self.link is None else self.link + if self.link not in ['log', 'identity']: + warn_insolver( + f'Link function "{self.link}" not supported for "{self.family}",using default "log" link', + InsolverWrapperWarning, + ) + if self.family in ['normal', 'gaussian']: + self.link = 'identity' if self.link is None else self.link + if self.link != 'identity': + warn_insolver( + f'Link function "{self.link}" not supported for "{self.family}",using default "identity" link', + InsolverWrapperWarning, + ) + if self.family in ['normal', 'gaussian']: + self.link = 'identity' if self.link is None else self.link + if self.link != 'identity': + warn_insolver( + f'Link function "{self.link}" not supported for "{self.family}",using default "identity" link', + InsolverWrapperWarning, + ) + if self.family == 'logit': + self.link = 'logit' if self.link is None else self.link + if self.link != 'logit': + warn_insolver( + f'Link function "{self.link}" not supported for "{self.family}",using default "logit" link', + InsolverWrapperWarning, + ) + + # Estimator initialization + if self.family == 'poisson': + # alpha=1.0, fit_intercept=True, max_iter=100, tol=0.0001, warm_start=False, verbose=0 + model = PoissonRegressor(**params) + if self.family == 'gamma': + # alpha=1.0, fit_intercept=True, max_iter=100, tol=0.0001, warm_start=False, verbose=0 + model = GammaRegressor(**params) + if self.family == 'tweedie': + # power=0.0, alpha=1.0, fit_intercept=True, link='auto', max_iter=100, tol=0.0001, + # warm_start=False, verbose=0 + model = TweedieRegressor(**params) + if self.family == 'inverse_gaussian': + # alpha=1.0, fit_intercept=True, max_iter=100, tol=0.0001, warm_start=False, verbose=0 + model = TweedieRegressor(power=3, **params) + if self.family in ['normal', 'gaussian']: + # alpha=1.0, l1_ratio=0.5, fit_intercept=True, normalize='deprecated', precompute=False, + # max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False, random_state=None, + # selection='cyclic' + model = ElasticNet(**params) + if self.family == 'logit': + # penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, + # random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, + # n_jobs=None, l1_ratio=None + model = LogisticRegression(**params) + + if self.family in ['poisson', 'gamma', 'tweedie', 'inverse_gaussian']: + model = Pipeline([('scaler', StandardScaler(with_mean=True, with_std=True)), ('glm', model)]) + self.metadata.update({'is_standardized': True}) + else: + self.metadata.update({'is_standardized': False}) + + return model + + def _init_glm_h2o(self, **params: Any) -> H2OGeneralizedLinearEstimator: + model = H2OGeneralizedLinearEstimator(family=self.family, link=self.link, **params) + return model + + def init_model(self) -> Any: + model = None + if self.backend == 'sklearn': + params = self.metadata['init_params']['kwargs'] + # params.update(params.pop('kwargs')) + model = self._init_glm_sklearn(**params) + if self.backend == 'h2o': + params = { + key: self.metadata['init_params'][key] + for key in self.metadata['init_params'] + if key not in ['family', 'link', 'backend', 'task', 'h2o_server_params', 'kwargs'] + } + params.update(self.metadata['init_params']['kwargs']) + model = self._init_glm_h2o(**params) + self._update_metadata() + return model + + def fit( + self, + x: Union[DataFrame, Series], + y: Union[DataFrame, Series], + sample_weight: Union[None, DataFrame, Series] = None, + x_valid: Union[None, DataFrame, Series] = None, + y_valid: Union[None, DataFrame, Series] = None, + sample_weight_valid: Union[None, DataFrame, Series] = None, + report: Union[None, List, Tuple, Callable] = None, + **kwargs: Any, + ) -> None: + """Fit a Generalized Linear Model. + + Args: + x (pd.DataFrame, pd.Series): Training data. + y (pd.DataFrame, pd.Series): Training target values. + sample_weight (pd.DataFrame, pd.Series, optional): Training sample weights. + x_valid (pd.DataFrame, pd.Series, optional): Validation data (only h2o supported). + y_valid (pd.DataFrame, pd.Series, optional): Validation target values (only h2o supported). + sample_weight_valid (pd.DataFrame, pd.Series, optional): Validation sample weights. + report (list, tuple, optional): A list of metrics to report after model fitting, optional. + **kwargs: Other parameters passed to H2OGeneralizedLinearEstimator. + """ + for arg in [x, y, sample_weight, x_valid, y_valid, sample_weight_valid]: + if (arg is not None) and (not isinstance(arg, (DataFrame, Series))): + argname = [k for k, v in locals().items() if v == arg][0] + raise TypeError( + f'Invalid type {type(arg)} for "{argname}". It must be either pd.DataFrame or pd.Series.' + ) + + for y_var in [y, y_valid]: + if isinstance(y_var, DataFrame) and y_var.shape[1] > 1: + argname = [k for k, v in locals().items() if v == y_var][0] + raise ValueError(f'Argument "{argname}" must be a one-dimensional DataFrame.') + + features = list(x.columns) if isinstance(x, DataFrame) else [x.name] + target = list(y.columns) if isinstance(y, DataFrame) else y.name + self.metadata.update({'feature_names': features, 'target': target}) + prediction = None + + if self.backend == 'sklearn': + if any(arg is not None for arg in [x_valid, y_valid, sample_weight_valid]): + warn_insolver( + 'Arguments x_valid, y_valid, sample_weight_valid are not supported by sklearn backend', + InsolverWrapperWarning, + ) + if self.metadata['is_standardized']: + self.model.fit(x, y, glm__sample_weight=sample_weight) + else: + self.model.fit(x, y, sample_weight=sample_weight) + self.metadata.update({'is_fitted': True}) + self.metadata.update({'coefs': self.coef()}) + if isinstance(report, (list, tuple)) or callable(report): + prediction = self.model.predict(x) + if self.backend == 'h2o': + h2o_start() + train_set, params = x_y_to_h2o_frame(x, y, sample_weight, {**kwargs}, x_valid, y_valid, sample_weight_valid) + self.model.train(y=target, x=features, training_frame=train_set, **params) + self.metadata.update({'is_fitted': True}) + self.metadata.update({'coefs': self.coef()}) + if isinstance(report, (list, tuple)) or callable(report): + prediction = self.model.predict(train_set).as_data_frame().values.reshape(-1) + self._model_cached = self.save_model() + h2o_stop() + + if prediction is not None: + if not callable(report) and (report is not None): + print( + DataFrame([[x.__name__, x(y, prediction)] for x in report], columns=['Metrics', 'Value']).set_index( + 'Metrics' + ) + ) + if callable(report) and (report is not None): + print( + DataFrame([[report.__name__, report(y, prediction)]], columns=['Metrics', 'Value']).set_index( + 'Metrics' + ) + ) + + def predict( + self, x: Union[DataFrame, Series], sample_weight: Union[None, DataFrame, Series] = None, **kwargs: Any + ) -> Optional[ndarray]: + """Predict using GLM with feature matrix X. + + Args: + x (pd.DataFrame, pd.Series): Samples. + sample_weight (pd.DataFrame, pd.Series, optional): Test sample weights. + **kwargs: Other parameters passed to H2OGeneralizedLinearEstimator.predict(). + + Returns: + array: Returns predicted values. + """ + if not self.metadata['is_fitted']: + raise ValueError("This instance is not fitted yet. Call '.fit(...)' before using this estimator.") + + if not isinstance(x, (DataFrame, Series)): + raise TypeError(f'Invalid type {type(x)} for "x". It must be either pd.DataFrame or pd.Series.') + + predictions = None + if self.backend == 'sklearn': + predictions = self.model.predict(x[self.metadata['feature_names']] if isinstance(x, DataFrame) else x) + + if self.backend == 'h2o': + if self._model_cached is not None: + load_h2o(self._model_cached, self.h2o_server_params, terminate=False) + if self.model.parms['offset_column']['actual_value'] is not None and sample_weight is None: + offset_name = self.model.parms['offset_column']['actual_value']['column_name'] + sample_weight = Series(repeat(1, len(x)), name=offset_name, index=x.index) + if sample_weight is not None: + x = concat([x, sample_weight], axis=1) + h2o_predict = x if isinstance(x, H2OFrame) else to_h2oframe(x) + predictions = self.model.predict(h2o_predict, **kwargs).as_data_frame().values.reshape(-1) + h2o_stop() + return predictions + + def predict_coef(self, x: Union[DataFrame, Series]) -> Optional[ndarray]: + """Predict using only GLM coefficients (without model itself) with feature matrix X. + + Args: + x (pd.DataFrame, pd.Series): Samples. + + Returns: + array: Returns predicted values. + """ + if (not self.metadata['is_fitted']) or ('coefs' not in self.metadata.keys()): + raise ValueError("This instance is not fitted yet. Call '.fit(...)' before using this estimator.") + + if not isinstance(x, (DataFrame, Series)): + raise TypeError(f'Invalid type {type(x)} for "x". It must be either pd.DataFrame or pd.Series.') + + def link_identity(lin_pred: ndarray) -> ndarray: + return lin_pred + + def link_log(lin_pred: ndarray) -> ndarray: + return exp(lin_pred) + + def link_inverse(lin_pred: ndarray) -> ndarray: + return true_divide(1, lin_pred) + + def link_logit(lin_pred: ndarray) -> ndarray: + return true_divide(exp(-lin_pred), 1 + exp(-lin_pred)) + + # def link_ologit(lin_pred): + # pass + # + # def link_tweedie(lin_pred): + # pass + + link_map = {'identity': link_identity, 'log': link_log, 'inverse': link_inverse, 'logit': link_logit} + + coefs = self.metadata['coefs'] + + if isinstance(x, DataFrame): + difference = set(coefs).difference(set(x.columns)) + elif isinstance(x, Series): + difference = set(coefs).difference({x.name}) + else: + difference = {'Intercept'} + difference.discard('Intercept') + if difference != set(): + raise KeyError(f'Input data missing columns: {difference}') + + coefs = Series(coefs) + x_ = x[coefs.index.drop('Intercept')] if isinstance(x, DataFrame) else x + x_ = hstack((ones((x_.shape[0], 1)), x_.values)) + linear_prediction = x_.dot(coefs.values) + if self.metadata['link'] in ['ologit', 'tweedie']: + raise NotImplementedError(f"Link function `{self.metadata['link']}` is not implemented.") + else: + return link_map[self.metadata['link']](linear_prediction).reshape(-1) + + def coef_norm(self) -> Optional[Dict[str, float]]: + """Output GLM coefficients for standardized data. + + Returns: + dict: {`str`: `float`} Dictionary containing GLM coefficients for standardized data. + """ + if not self.metadata['is_fitted']: + raise ValueError("This instance is not fitted yet. Call '.fit(...)' before using this estimator.") + + coefs = None + if self.backend == 'sklearn': + if self.metadata['is_standardized']: + if self.metadata['feature_names'] is None: + features_ = [f'Feature_{i}' for i in range(len(self.model.named_steps['glm'].coef_))] + self.metadata['feature_names'] = features_ + else: + features_ = self.metadata['feature_names'] + + _zip = zip( + ['Intercept'] + features_, + insert(self.model.named_steps['glm'].coef_, 0, self.model.named_steps['glm'].intercept_), + ) + coefs = {x: y for x, y in _zip} + else: + raise NotImplementedError(f'Current method does not support {self.family} family.') + if self.backend == 'h2o': + coefs = self.model.coef_norm() + return coefs + + def coef(self) -> Optional[Dict[str, float]]: + """Output GLM coefficients for non-standardized data. Also calculated when GLM fitted on standardized data. + + Returns: + dict: {`str`: `float`} Dictionary containing GLM coefficients for non-standardized data. + """ + if not self.metadata['is_fitted']: + raise ValueError("This instance is not fitted yet. Call '.fit(...)' before using this estimator.") + + coefs = None + if self.backend == 'sklearn': + if self.metadata['feature_names'] is None: + if self.metadata['is_standardized']: + features_ = [f'Feature_{i}' for i in range(len(self.model.named_steps['glm'].coef_))] + else: + features_ = [f'Feature_{i}' for i in range(len(self.model.coef_))] + self.metadata['feature_names'] = features_ + else: + features_ = self.metadata['feature_names'] + + if self.metadata['is_standardized']: + _int = self.model.named_steps['glm'].intercept_ + _coef = self.model.named_steps['glm'].coef_ + _mean = self.model.named_steps['scaler'].mean_ + _var = self.model.named_steps['scaler'].var_ + intercept = _int - npsum(_coef * _mean / sqrt(_var)) + coefs_ = _coef / sqrt(_var) + else: + intercept = self.model.intercept_ + coefs_ = self.model.coef_ + + _zip = zip(['Intercept'] + features_, insert(coefs_, 0, intercept)) + coefs = {x: y for x, y in _zip} + if self.backend == 'h2o': + coefs = self.model.coef() + return coefs + + def coef_to_csv(self, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> None: + """Write GLM coefficients to a comma-separated values (csv) file. + + Args: + path_or_buf : str or file handle, default None + File path or object, if None is provided the result is returned as + a string. If a non-binary file object is passed, it should be opened + with `newline=''`, disabling universal newlines. If a binary + file object is passed, `mode` might need to contain a `'b'`. + **kwargs: Other parameters passed to Pandas DataFrame.to_csv method. + Returns: + None or str + If path_or_buf is None, returns the resulting csv format as a + string. Otherwise, returns None. + """ + result = DataFrame() + sources_methods = { + 'coefficients for standardized data': self.coef_norm, + 'coefficients for non-standardized data': self.coef, + } + + for name, method in sources_methods.items(): + try: + column = method() + + if isinstance(column, dict): + result = result.join(Series(column, name=name), how='outer') + except NotImplementedError: + pass + + if result.size > 0: + if path_or_buf is None: + return result.to_csv(path_or_buf, **kwargs) + else: + result.to_csv(path_or_buf, **kwargs) + else: + warn_insolver('No coefficients available!', InsolverWrapperWarning) diff --git a/insolver/wrappers_v2/utils/__init__.py b/insolver/wrappers_v2/utils/__init__.py new file mode 100644 index 0000000..41c4585 --- /dev/null +++ b/insolver/wrappers_v2/utils/__init__.py @@ -0,0 +1,3 @@ +from .save_load_utils import load_model +from .save_load_utils import save_pickle, save_dill +from .h2o_utils import save_h2o diff --git a/insolver/wrappers_v2/utils/h2o_utils.py b/insolver/wrappers_v2/utils/h2o_utils.py new file mode 100644 index 0000000..da9f2b1 --- /dev/null +++ b/insolver/wrappers_v2/utils/h2o_utils.py @@ -0,0 +1,120 @@ +import os +from os import PathLike +from typing import Dict, Any, Union, Optional, Tuple + +from pandas import DataFrame, Series, concat +from numpy import arange + +from h2o.frame import H2OFrame +from h2o.backend import H2OLocalServer +from h2o.estimators import H2OEstimator +from h2o import no_progress, cluster, remove_all, connect, load_model, save_model + + +def h2o_start(h2o_server_params: Dict[str, Any] = None) -> None: + # nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, + # ice_root=None, log_dir=None, log_level=None, max_log_file_size=None, port="54321+", name=None, + # extra_classpath=None, verbose=True, jvm_custom_args=None, bind_to_localhost=True + h2o_server_params = {'verbose': False} if h2o_server_params is None else h2o_server_params + no_progress() + if (cluster() is None) or (not cluster().is_running()): + h2oserver = H2OLocalServer.start(**h2o_server_params) + connect(server=h2oserver, verbose=False) + + +def h2o_stop() -> None: + if (cluster() is not None) or (cluster().is_running()): + remove_all() + cluster().shutdown() + + +def to_h2oframe(df: DataFrame) -> H2OFrame: + """Function converts pandas.DataFrame to h2o.H2OFrame ensuring there is no bug duplicating rows in results. + + Args: + df (pandas.DataFrame): Dataset to convert to h2o.H2OFrame + + Returns: + DataFrame converted to h2o.H2OFrame. + """ + + # https://stackoverflow.com/questions/45672118/h2oframe-in-python-is-adding-additional-duplicate-rows-to-the-pandas-dataframe + df_h2o = df.copy().reset_index(drop=True) + h2of = H2OFrame(df_h2o) + + if h2of.shape[0] != df_h2o.shape[0]: + df_h2o['__insolver_temp_row_id'] = arange(len(df_h2o)) + h2of = H2OFrame(df_h2o) + h2of = h2of.drop_duplicates(columns=['__insolver_temp_row_id'], keep='first') + h2of = h2of.drop('__insolver_temp_row_id', axis=1) + return h2of + + +def x_y_to_h2o_frame( + x: Union[DataFrame, Series], + y: Union[DataFrame, Series], + sample_weight: Union[DataFrame, Series], + params: Dict, + x_valid: Union[DataFrame, Series], + y_valid: Union[DataFrame, Series], + sample_weight_valid: Union[DataFrame, Series], +) -> Tuple[H2OFrame, Dict]: + if (sample_weight is not None) and isinstance(sample_weight, (DataFrame, Series)): + params['offset_column'] = ( + list(sample_weight.columns) if isinstance(sample_weight, DataFrame) else sample_weight.name + ) + x = concat([x, sample_weight], axis=1) + train_set = to_h2oframe(concat([x, y], axis=1)) + + if (x_valid is not None) and (y_valid is not None): + if all([sam_weight is not None for sam_weight in [sample_weight_valid, sample_weight]]) and isinstance( + sample_weight_valid, (DataFrame, Series) + ): + x_valid = concat([x_valid, sample_weight_valid], axis=1) + valid_set = to_h2oframe(concat([x_valid, y_valid], axis=1)) + params['validation_frame'] = valid_set + return train_set, params + + +def save_h2o( + model: H2OEstimator, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any +) -> Optional[bytes]: + if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): + raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") + + _model_cached = None if '_model_cached' not in kwargs else kwargs.pop('_model_cached') + + if path_or_buf is None: + # Since there no possibility to save h2o model to a variable, workaround is needed + if _model_cached is None: + save_model(model=model, filename='.temp_h2o_model_save', **kwargs) + with open('.temp_h2o_model_save', 'rb') as file: + saved = file.read() + os.remove('.temp_h2o_model_save') + else: + saved = _model_cached + return saved + else: + path, filename = os.path.split(path_or_buf) + # force = False, export_cross_validation_predictions = False + save_model(model=model, path=path, filename=filename, **kwargs) + return None + + +def load_h2o( + path_or_buf: Union[str, PathLike[str], bytes], + h2o_server_params: Optional[Dict[str, Any]] = None, + terminate: bool = True, +) -> H2OEstimator: + h2o_start(h2o_server_params) + if isinstance(path_or_buf, (str, PathLike)): + model = load_model(path_or_buf) + else: + # Since there no possibility to load h2o model from a variable, workaround is needed + with open('.temp_h2o_model_load', 'wb') as file: + file.write(path_or_buf) + model = load_model('.temp_h2o_model_load') + os.remove('.temp_h2o_model_load') + if terminate: + h2o_stop() + return model diff --git a/insolver/wrappers_v2/utils/save_load_utils.py b/insolver/wrappers_v2/utils/save_load_utils.py new file mode 100644 index 0000000..cd3e996 --- /dev/null +++ b/insolver/wrappers_v2/utils/save_load_utils.py @@ -0,0 +1,89 @@ +import os +import json +import pickle +import dill +from os import PathLike +from typing import Union, Any, Optional, IO, Callable, Dict +from zipfile import ZipFile, ZIP_DEFLATED, BadZipFile + +from .h2o_utils import load_h2o + + +def load(path_or_buf: Union[str, PathLike[str], bytes], saving_method: str, **kwargs: Any) -> Callable: + load_config: Dict[str, Callable] = dict(pickle=load_pickle, dill=load_dill, h2o=load_h2o) + return load_config[saving_method](path_or_buf, **kwargs) + + +def load_model(path_or_buf: Union[str, PathLike[str], IO[bytes]], **kwargs: Any) -> Any: + from insolver.wrappers_v2 import InsolverGLMWrapper + + wrapper_config = dict(glm=InsolverGLMWrapper) + + if isinstance(path_or_buf, str): + path_or_buf = os.path.abspath(path_or_buf) + + try: + with ZipFile(file=path_or_buf, mode="r", compression=ZIP_DEFLATED) as zip_file: + filenames = zip_file.namelist() + if (len(zip_file.filelist) == 2) and ("metadata.json" in filenames): + metadata = json.loads(zip_file.read("metadata.json")) + filenames.remove("metadata.json") + model = zip_file.read(filenames[0]) + else: + raise RuntimeError( + "File has inappropriate format. Currently `load_model` can load only models saved " + "with `mode='insolver'` option." + ) + + init_params = metadata["init_params"] + init_params.update(init_params.pop("kwargs")) + wrapper_ = wrapper_config[metadata["algo"]](**init_params) + wrapper_.metadata.update(metadata) + wrapper_.model = load(model, metadata["saving_method"], **kwargs) + wrapper_.metadata.pop("saving_method") + return wrapper_ + except BadZipFile: + raise RuntimeError( + "File has inappropriate format. Currently `load_model` can load only models saved " + "with `mode='insolver'` option." + ) + + +def save_pickle(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> Optional[bytes]: + if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): + raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") + + if path_or_buf is None: + return pickle.dumps(model, **kwargs) + else: + with open(path_or_buf, "wb") as _file: + pickle.dump(model, _file, **kwargs) + return None + + +def load_pickle(path_or_buf: Union[str, PathLike[str], bytes], **kwargs: Any) -> Any: + if isinstance(path_or_buf, (str, PathLike)): + with open(path_or_buf, 'rb') as _file: + return pickle.load(_file, **kwargs) + else: + return pickle.loads(path_or_buf, **kwargs) + + +def save_dill(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> Optional[bytes]: + if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): + raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") + + if path_or_buf is None: + return dill.dumps(model, **kwargs) + else: + with open(path_or_buf, "wb") as _file: + dill.dump(model, _file, **kwargs) + return None + + +def load_dill(path_or_buf: Union[str, PathLike[str], bytes], **kwargs: Any) -> Any: + if isinstance(path_or_buf, (str, PathLike)): + with open(path_or_buf, 'rb') as _file: + return dill.load(_file, **kwargs) + else: + return dill.loads(path_or_buf, **kwargs) diff --git a/requirements.txt b/requirements.txt index 53c2351..05f3f08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,15 @@ # Python 3.7 specific -typing_extensions>=4.0.1 +typing_extensions>=4.0.; python_version<"3.8" # Restrictions due to Python 3.7 -numpy~=1.21.0,<1.22.0 -pandas>=1.2.0,<1.4.0 -scipy>=1.6.0,<1.8.0 -scikit-learn>=1.0,<1.1.0 +numpy~=1.21.0,<1.22.0; python_version=="3.7" +pandas>=1.2.0,<1.4.0; python_version=="3.7" +scipy>=1.6.0,<1.8.0; python_version=="3.7" +scikit-learn>=1.0,<1.1.0; python_version=="3.7" +numpy>=1.21.0; python_version>"3.7" +pandas>=1.2.0; python_version>"3.7" +scipy>=1.6.0; python_version>"3.7" +scikit-learn>=1.0; python_version>"3.7" # Python 3.7 not affecting xgboost>=1.5.0 # Python 3.7 deprecates after major release after 1.6.1 @@ -16,7 +20,7 @@ matplotlib>=3.4.3 plotly>=5.3.1 seaborn==0.11.2 shap>=0.39.0 -h2o>=3.32.0.3 +h2o>=3.36.0.* statsmodels==0.13.1 lime>=0.2.0.1 dill>=0.3.4 @@ -31,7 +35,8 @@ fastapi>=0.65.2 uvicorn[standard]>=0.13.3 pydantic==1.8.2 gunicorn>=20.0.4 -django==3.2.* # Restrictions due to Python 3.7 +django==3.2.*; python_version=="3.7" # Restrictions due to Python 3.7 +django>=3.2.*; python_version>"3.7" djangorestframework==3.13.1 psycopg2-binary==2.9.3 -sympy==1.9 \ No newline at end of file +sympy==1.9 diff --git a/tests/test_wrappers_v2_InsolverBaseWrapper.py b/tests/test_wrappers_v2_InsolverBaseWrapper.py new file mode 100644 index 0000000..cd4ef62 --- /dev/null +++ b/tests/test_wrappers_v2_InsolverBaseWrapper.py @@ -0,0 +1,51 @@ +import pytest + +from insolver.wrappers_v2 import InsolverBaseWrapper +from insolver.wrappers_v2.utils import save_pickle, save_dill + + +class DescendantInsolverBaseWrapper(InsolverBaseWrapper): + algo = 'dummy' + _backend_saving_methods = {'some_backend': {'pickle': save_pickle, 'dill': save_dill}} + + def __init__(self, backend, task): + self._get_init_args(vars()) + self.backend = backend + self.task = task + + +def test_InsolverBaseWrapper(): + descendant = DescendantInsolverBaseWrapper(backend='some_backend', task='nothing') + assert descendant.metadata == {'init_params': {'backend': 'some_backend', 'task': 'nothing'}, 'is_fitted': False} + assert descendant.algo == 'dummy' + assert descendant.model is None + assert descendant() is None + assert descendant._backend_saving_methods == {'some_backend': {'pickle': save_pickle, 'dill': save_dill}} + descendant._update_metadata() + assert descendant.metadata == { + 'init_params': {'backend': 'some_backend', 'task': 'nothing'}, + 'backend': 'some_backend', + 'task': 'nothing', + 'is_fitted': False, + } + + +def test_InsolverBaseWrapper_save_model(): + descendant = DescendantInsolverBaseWrapper(backend='some_backend', task='nothing') + descendant._update_metadata() + + with pytest.raises(ValueError, match="No fitted model found. Fit model first."): + descendant.save_model() + descendant.model = {'dummy_object': "model"} + + with pytest.raises(ValueError, match=r'Invalid method ".*". Supported values for .* backend are .*'): + descendant.save_model(method='some_new_method') + + # model_to_str = descendant.save_model() + # assert isinstance(model_to_str, bytes) + + # model_to_str = descendant.save_model(method='pickle') + # assert isinstance(model_to_str, bytes) + + # model_to_str = descendant.save_model(method='dill') + # assert isinstance(model_to_str, bytes) From 7a3b41cbe13f2634655ffa6444a9189b7d57b35d Mon Sep 17 00:00:00 2001 From: alexmindset Date: Thu, 4 Aug 2022 00:54:05 +0300 Subject: [PATCH 3/5] Maintenance: Fixing test for InsolverBaseWrapper --- tests/test_wrappers_v2_InsolverBaseWrapper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_wrappers_v2_InsolverBaseWrapper.py b/tests/test_wrappers_v2_InsolverBaseWrapper.py index cd4ef62..4c96243 100644 --- a/tests/test_wrappers_v2_InsolverBaseWrapper.py +++ b/tests/test_wrappers_v2_InsolverBaseWrapper.py @@ -16,7 +16,11 @@ def __init__(self, backend, task): def test_InsolverBaseWrapper(): descendant = DescendantInsolverBaseWrapper(backend='some_backend', task='nothing') - assert descendant.metadata == {'init_params': {'backend': 'some_backend', 'task': 'nothing'}, 'is_fitted': False} + assert descendant.metadata == { + 'init_params': {'backend': 'some_backend', 'task': 'nothing'}, + 'is_fitted': False, + 'algo': 'dummy', + } assert descendant.algo == 'dummy' assert descendant.model is None assert descendant() is None @@ -26,6 +30,7 @@ def test_InsolverBaseWrapper(): 'init_params': {'backend': 'some_backend', 'task': 'nothing'}, 'backend': 'some_backend', 'task': 'nothing', + 'algo': 'dummy', 'is_fitted': False, } From c6b9bd5d90b99a5de74d89cb739bd41bee589bb1 Mon Sep 17 00:00:00 2001 From: alexmindset Date: Thu, 4 Aug 2022 01:13:09 +0300 Subject: [PATCH 4/5] Fix annotations --- insolver/wrappers_v2/base.py | 4 ++-- insolver/wrappers_v2/glm.py | 2 +- insolver/wrappers_v2/utils/h2o_utils.py | 4 ++-- insolver/wrappers_v2/utils/save_load_utils.py | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/insolver/wrappers_v2/base.py b/insolver/wrappers_v2/base.py index f1f7159..389513b 100644 --- a/insolver/wrappers_v2/base.py +++ b/insolver/wrappers_v2/base.py @@ -49,7 +49,7 @@ def _update_metadata(self) -> None: } self.metadata.update(_metadata) - def _save_insolver(self, path_or_buf: Union[str, PathLike[str]], method: Callable, **kwargs: Any) -> None: + def _save_insolver(self, path_or_buf: Union[str, 'PathLike[str]'], method: Callable, **kwargs: Any) -> None: buffer = BytesIO() with ZipFile(buffer, mode="w", compression=ZIP_DEFLATED) as zip_file: zip_file.writestr("metadata.json", json.dumps(self.metadata)) @@ -63,7 +63,7 @@ def _save_insolver(self, path_or_buf: Union[str, PathLike[str]], method: Callabl def save_model( self, - path_or_buf: Union[None, str, PathLike[str]] = None, + path_or_buf: Union[None, str, 'PathLike[str]'] = None, mode: Literal['insolver', 'raw'] = "insolver", method: str = '', **kwargs: Any, diff --git a/insolver/wrappers_v2/glm.py b/insolver/wrappers_v2/glm.py index e5a8786..a96b664 100644 --- a/insolver/wrappers_v2/glm.py +++ b/insolver/wrappers_v2/glm.py @@ -414,7 +414,7 @@ def coef(self) -> Optional[Dict[str, float]]: coefs = self.model.coef() return coefs - def coef_to_csv(self, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> None: + def coef_to_csv(self, path_or_buf: Union[None, str, 'PathLike[str]'] = None, **kwargs: Any) -> None: """Write GLM coefficients to a comma-separated values (csv) file. Args: diff --git a/insolver/wrappers_v2/utils/h2o_utils.py b/insolver/wrappers_v2/utils/h2o_utils.py index da9f2b1..96426a8 100644 --- a/insolver/wrappers_v2/utils/h2o_utils.py +++ b/insolver/wrappers_v2/utils/h2o_utils.py @@ -77,7 +77,7 @@ def x_y_to_h2o_frame( def save_h2o( - model: H2OEstimator, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any + model: H2OEstimator, path_or_buf: Union[None, str, 'PathLike[str]'] = None, **kwargs: Any ) -> Optional[bytes]: if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") @@ -102,7 +102,7 @@ def save_h2o( def load_h2o( - path_or_buf: Union[str, PathLike[str], bytes], + path_or_buf: Union[str, 'PathLike[str]', bytes], h2o_server_params: Optional[Dict[str, Any]] = None, terminate: bool = True, ) -> H2OEstimator: diff --git a/insolver/wrappers_v2/utils/save_load_utils.py b/insolver/wrappers_v2/utils/save_load_utils.py index cd3e996..4813505 100644 --- a/insolver/wrappers_v2/utils/save_load_utils.py +++ b/insolver/wrappers_v2/utils/save_load_utils.py @@ -9,12 +9,12 @@ from .h2o_utils import load_h2o -def load(path_or_buf: Union[str, PathLike[str], bytes], saving_method: str, **kwargs: Any) -> Callable: +def load(path_or_buf: Union[str, 'PathLike[str]', bytes], saving_method: str, **kwargs: Any) -> Callable: load_config: Dict[str, Callable] = dict(pickle=load_pickle, dill=load_dill, h2o=load_h2o) return load_config[saving_method](path_or_buf, **kwargs) -def load_model(path_or_buf: Union[str, PathLike[str], IO[bytes]], **kwargs: Any) -> Any: +def load_model(path_or_buf: Union[str, 'PathLike[str]', IO[bytes]], **kwargs: Any) -> Any: from insolver.wrappers_v2 import InsolverGLMWrapper wrapper_config = dict(glm=InsolverGLMWrapper) @@ -49,7 +49,7 @@ def load_model(path_or_buf: Union[str, PathLike[str], IO[bytes]], **kwargs: Any) ) -def save_pickle(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> Optional[bytes]: +def save_pickle(model: Any, path_or_buf: Union[None, str, 'PathLike[str]'] = None, **kwargs: Any) -> Optional[bytes]: if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") @@ -61,7 +61,7 @@ def save_pickle(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, return None -def load_pickle(path_or_buf: Union[str, PathLike[str], bytes], **kwargs: Any) -> Any: +def load_pickle(path_or_buf: Union[str, 'PathLike[str]', bytes], **kwargs: Any) -> Any: if isinstance(path_or_buf, (str, PathLike)): with open(path_or_buf, 'rb') as _file: return pickle.load(_file, **kwargs) @@ -69,7 +69,7 @@ def load_pickle(path_or_buf: Union[str, PathLike[str], bytes], **kwargs: Any) -> return pickle.loads(path_or_buf, **kwargs) -def save_dill(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, **kwargs: Any) -> Optional[bytes]: +def save_dill(model: Any, path_or_buf: Union[None, str, 'PathLike[str]'] = None, **kwargs: Any) -> Optional[bytes]: if not ((path_or_buf is None) or (isinstance(path_or_buf, str))): raise ValueError(f"Invalid file path or buffer object {type(path_or_buf)}") @@ -81,7 +81,7 @@ def save_dill(model: Any, path_or_buf: Union[None, str, PathLike[str]] = None, * return None -def load_dill(path_or_buf: Union[str, PathLike[str], bytes], **kwargs: Any) -> Any: +def load_dill(path_or_buf: Union[str, 'PathLike[str]', bytes], **kwargs: Any) -> Any: if isinstance(path_or_buf, (str, PathLike)): with open(path_or_buf, 'rb') as _file: return dill.load(_file, **kwargs) From 7e6a2940f37ec4c927a7de3573fe6a0240df39ea Mon Sep 17 00:00:00 2001 From: alexmindset Date: Thu, 4 Aug 2022 01:27:21 +0300 Subject: [PATCH 5/5] Maintenance: code coverage only on master branch. --- .github/workflows/insolver-tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/insolver-tests.yaml b/.github/workflows/insolver-tests.yaml index e6447ff..3392796 100644 --- a/.github/workflows/insolver-tests.yaml +++ b/.github/workflows/insolver-tests.yaml @@ -49,9 +49,9 @@ jobs: shell: bash run: | python -m pytest --cov=./ --cov-report=xml - - name: Code coverage with codecov on ubuntu-latest and Python 3.8 + - name: Code coverage with codecov on (ubuntu-latest & Python 3.8 & master) if: | - (matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8') + (matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' && github.ref == 'refs/heads/master') uses: codecov/codecov-action@v3 with: flags: unittests