Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Dropping python v3.8, update dependencies, adapt unit-test based on API changes #188

Merged
merged 1 commit into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ omit =

[report]
show_missing = True
fail_under = 90
fail_under = 95

[html]
directory = htmlcov
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8"]
python-version: ["3.9"]
steps:
#----------------------------------------------
# ----- check-out repo and set-up python -----
Expand Down
17 changes: 4 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ jobs:
#----------------------------------------------
runs-on: ${{ matrix.os }}
strategy:
# TODO(amir): currently this is `false` so we can use the `poetry-cache` of `python v3.8`
# once the stupid `glmnet` dependency is resolved, change `fail-fast = True`
fail-fast: false
fail-fast: true
matrix:
# TODO(amir): enable `windows-latest`, `macos-latest` and fix possible `poetry` issues and glmnet
# TODO(amir): add `"3.12"` once the glmnet wheel is released
os: ["ubuntu-latest"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11"]
steps:
#----------------------------------------------
# ----- check-out repo and set-up python -----
Expand Down Expand Up @@ -55,15 +54,7 @@ jobs:
uses: actions/cache@v3
with:
path: .venv
# TODO(amir): here, we explicitly set the key independent of what `python-version` we are running
# the main issue is with `glmnet` that does not currently support `python v3.9 and 3.10`
# therefore, all the CI jobs for those python versions failed at first, then we re-run the
# jobs, the cached venv using `python v3.8` will be retrieved and the jobs will run successfully
# ideally, we should be able to add `python-versions` here to distinguish between caches
# key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
# NOTE: `glmnet` has not been updated since 2020; trying to build it on-the-fly
# https://github.com/civisanalytics/python-glmnet/issues/79
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
#----------------------------------------------
# ----- install dependencies -----
#----------------------------------------------
Expand Down
893 changes: 466 additions & 427 deletions examples/quick-starts/metrics/BinaryClassificationMetrics.ipynb

Large diffs are not rendered by default.

4,057 changes: 1,995 additions & 2,062 deletions poetry.lock

Large diffs are not rendered by default.

107 changes: 53 additions & 54 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ classifiers = [
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Natural Language :: English",
Expand All @@ -52,98 +52,97 @@ exclude = [


[tool.poetry.dependencies]
python = ">=3.8,<3.12"
python = ">=3.9,<3.13"

# --- scientific-computing ---
# TODO(amir): since numpy "^1.23" distutils is deprecated which glmnet needs it
# wait for `glmnet` for a new release; then upgrade this
numpy = "^1.22,<1.23"
pandas = "^1.5"
scipy = "^1.9"
statsmodels = "^0.13"
numpy = ">=1.22,<2.0"
pandas = ">=1.4,<2.0"
scipy = ">=1.9"
statsmodels = ">=0.13"

# --- machine-learning ---
scikit-learn = "^1.1"
xgboost = "^1.7"
glmnet = "^2.2"
shap = "^0.41"
scikit-learn = ">=1.1"
xgboost = ">=1.7,<2.0"
python-glmnet = ">=2.2"
shap = ">=0.46"

# --- optimization ----
bayesian-optimization = "^1.2"
hyperopt = "^0.2"
bayesian-optimization = ">=1.2"
hyperopt = ">=0.2"

# --- visualization ---
# TODO(amir): matplotlib v>=3.6 comes with breaking changes
# make sure to apply the upgrade and fix the issues once the API refactor is done
matplotlib = "^3.5,<3.6"
seaborn = "^0.12"
matplotlib = ">=3.5"
seaborn = ">=0.12"

# --- command-lines ---
click = "^8.1"
click = ">=8.1"


[tool.poetry.group.dev.dependencies]

# --- package-management ---
pip = "^22.3"
pip = ">=22.3"
setuptools = ">=70.3"

# --- task-management ---
poethepoet = "^0.16"
poethepoet = ">=0.16"

# --- testenv-management ---
tox = "^3.28"
tox = ">=3.28"

# --- formatting ---
add-trailing-comma = "^2.4"
isort = "^5.11"
black = "^22.12"
jupyter-black = "^0.3"
add-trailing-comma = ">=2.4"
isort = ">=5.11"
black = ">=22.12"
jupyter-black = ">=0.3"

# --- linting ---
flake8 = "^5.0"
flake8-commas = "^2.1"
flake8-annotations = "^2.9"
flake8-comprehensions = "^3.10"
flake8-eradicate = "^1.4"
flake8-simplify = "^0.19"
flake8-tidy-imports = "^4.8"
flake8-type-checking = "^2.3"
flake8-typing-imports = "^1.12"
flake8-use-fstring = "^1.4"
pep8-naming = "^0.13"
flake8 = ">=5.0"
flake8-commas = ">=4.0"
flake8-annotations = ">=2.9"
flake8-comprehensions = ">=3.10"
flake8-eradicate = ">=1.4"
flake8-simplify = ">=0.19"
flake8-tidy-imports = ">=4.8"
flake8-type-checking = ">=2.3"
flake8-typing-imports = ">=1.12"
flake8-use-fstring = ">=1.4"
pep8-naming = ">=0.13"

# --- type-checking ---
mypy = "^0.991"
pandas-stubs = "^1.5"
data-science-types = "^0.2"
mypy = ">=0.991"
pandas-stubs = ">=1.5"
data-science-types = ">=0.2"

# --- unit-testing ---
pytest = "^7.2"
pytest-cov = "^4.0"
pytest-mock = "^3.10"
mock = "^4.0"
coverage = "^6.5"
assertpy = "^1.1"
pytest = ">=7.2"
pytest-cov = ">=4.0"
pytest-mock = ">=3.10"
mock = ">=4.0"
coverage = ">=6.5"
assertpy = ">=1.1"

# --- docs ----
sphinx = "^5.3"
sphinx-autoapi = "^1.9"
sphinx_design = "^0.3"
myst-parser = "^0.18"
furo = "^2022.9"
sphinx = ">=5.3"
sphinx-autoapi = ">=1.9"
sphinx_design = ">=0.3"
myst-parser = ">=0.18"
furo = ">=2022.9"

# --- jupyter ---
ipykernel = "^6.20"
jupytext = "^1.14"
ipykernel = ">=6.29"
jupytext = ">=1.14"

# --- monitoring ---
watchdog = "^2.1"
watchdog = ">=2.1"

# --- image manipulation ---
pillow = "^9.3.0"
pillow = ">=9.3"

[build-system]
requires = ["poetry-core>=1.4.0"]
requires = ["poetry-core>=1.9.0"]
build-backend = "poetry.core.masonry.api"


Expand Down
14 changes: 8 additions & 6 deletions src/slickml/metrics/_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,14 @@ def _metrics_dict(self) -> Dict[str, Optional[float]]:
number=self.mse_,
ndigits=self.precision_digits,
),
"Mean Squared Log Error": round(
number=self.msle_,
ndigits=self.precision_digits,
)
if self.msle_
else None,
"Mean Squared Log Error": (
round(
number=self.msle_,
ndigits=self.precision_digits,
)
if self.msle_
else None
),
"Mean Absolute Percentage Error": round(
number=self.mape_,
ndigits=self.precision_digits,
Expand Down
29 changes: 20 additions & 9 deletions src/slickml/optimization/_bayesianopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import xgboost as xgb
from bayes_opt import BayesianOptimization
from bayes_opt.util import UtilityFunction

from slickml.base import BaseXGBoostEstimator
from slickml.utils import check_var
Expand Down Expand Up @@ -366,19 +367,23 @@ def _xgb_eval(
self.optimizer_.maximize(
init_points=self.n_init_iter,
n_iter=self.n_iter,
acq=self.acquisition_criterion,
kappa=2.576,
kappa_decay=1,
kappa_decay_delay=0,
xi=0.0,
acquisition_function=UtilityFunction(
kind=self.acquisition_criterion,
kappa=2.576,
xi=0.0,
kappa_decay=1,
kappa_decay_delay=0,
),
)
self.results_ = self.get_results()
self.best_params_ = self.get_best_params()
self.best_results_ = self.get_best_results()

return None

def get_params_bounds(self) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
def get_params_bounds(
self,
) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
"""Returns the hyper-parameters boundaries for the tuning process.

Returns
Expand Down Expand Up @@ -447,7 +452,9 @@ def get_best_results(self) -> pd.DataFrame:
cond = self.results_[self.metrics] == self.results_[self.metrics].max()
return self.results_.loc[cond, :].reset_index(drop=True)

def _default_params_bounds(self) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
def _default_params_bounds(
self,
) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
"""Default set of parameters when the class is being instantiated with ``params_bounds=None``.

Notes
Expand Down Expand Up @@ -575,9 +582,13 @@ def _metrics_and_objectives_should_be_aligned(self) -> None:
None
"""
if self.metrics in self._clf_metrics() and self.objective not in self._clf_objectives():
raise ValueError("Classification metrics cannot be used with regression objectives.")
raise ValueError(
"Classification metrics cannot be used with regression objectives.",
)

if self.metrics not in self._clf_metrics() and self.objective in self._clf_objectives():
raise ValueError("Regression metrics cannot be used with classification objectives.")
raise ValueError(
"Regression metrics cannot be used with classification objectives.",
)

return None
18 changes: 9 additions & 9 deletions src/slickml/visualization/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def plot_binary_classification_metrics(

# TODO(amir): move this to a function ?
# prepare thresholds for plotting
thr_set1 = np.arange(
min(kwargs["roc_thresholds"]),
max(kwargs["roc_thresholds"]),
0.01,
thr_set1 = np.linspace(
start=min(kwargs["roc_thresholds"]),
stop=max(kwargs["roc_thresholds"]),
num=1000,
)
thr_set2 = np.arange(
min(kwargs["pr_thresholds"]),
max(kwargs["pr_thresholds"]),
0.01,
thr_set2 = np.linspace(
start=min(kwargs["pr_thresholds"]),
stop=max(kwargs["pr_thresholds"]),
num=1000,
)
f1_list = [
2
Expand Down Expand Up @@ -552,7 +552,7 @@ def plot_regression_metrics(
freqs, _, _ = ax5.hist(
kwargs["y_ratio"],
histtype="bar",
bins=np.arange(0.75, 1.25, 0.01),
bins=np.arange(0.75, 1.25, 0.05),
alpha=1.0,
color="#B3C3F3",
edgecolor="navy",
Expand Down
14 changes: 8 additions & 6 deletions tests/slickml/classification/test_glmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def test_glmnetcvclassifier__passes__with_defaults_and_no_test_targets(
assert_that(coeff_path_fig).is_instance_of(Figure)
assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)

@pytest.mark.parametrize(
("clf_train_test_x_y"),
Expand Down Expand Up @@ -257,8 +257,8 @@ def test_glmnetcvclassifier__passes__with_defaults(
assert_that(coeff_path_fig).is_instance_of(Figure)
assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)

# TODO(amir): add a test for `lambda_path` parameter
@pytest.mark.parametrize(
Expand Down Expand Up @@ -499,13 +499,15 @@ def test_glmnetcvclassifier_plots__passes__with_valid_save_paths(
path=coeff_path_fig_path,
expected_size=(1627, 930),
)
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
"shap_waterfall_fig.png",
)
_validate_figure_type_and_size(
path=shap_waterfall_fig_path,
expected_size=(1375, 974),
)
assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
_validate_figure_type_and_size(
path=shap_summary_fig_path,
expected_size=(1474, 760),
expected_size=(1472, 757),
)
10 changes: 7 additions & 3 deletions tests/slickml/classification/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,18 +465,22 @@ def test_xgboostclassifier_plots__passes__with_valid_save_paths(
display_plot=False,
)

assert_that(feature_importance_fig_path.parts[-1]).is_equal_to("feature_importance_fig.png")
assert_that(feature_importance_fig_path.parts[-1]).is_equal_to(
"feature_importance_fig.png",
)
_validate_figure_type_and_size(
path=feature_importance_fig_path,
expected_size=(1395, 943),
)
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
"shap_waterfall_fig.png",
)
_validate_figure_type_and_size(
path=shap_waterfall_fig_path,
expected_size=(1391, 974),
)
assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
_validate_figure_type_and_size(
path=shap_summary_fig_path,
expected_size=(1474, 760),
expected_size=(1472, 757),
)
Loading
Loading