Skip to content

Commit

Permalink
Update deps, remove python v3.8, and adapt unit-test based on API cha…
Browse files Browse the repository at this point in the history
…nges ...
  • Loading branch information
amirhessam88 committed Jul 17, 2024
1 parent 8b0d7d7 commit 780fb96
Show file tree
Hide file tree
Showing 18 changed files with 2,626 additions and 2,614 deletions.
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ omit =

[report]
show_missing = True
fail_under = 90
fail_under = 95

[html]
directory = htmlcov
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8"]
python-version: ["3.9"]
steps:
#----------------------------------------------
# ----- check-out repo and set-up python -----
Expand Down
17 changes: 4 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ jobs:
#----------------------------------------------
runs-on: ${{ matrix.os }}
strategy:
# TODO(amir): currently this is `false` so we can use the `poetry-cache` of `python v3.8`
# once the stupid `glmnet` dependency is resolved, change `fail-fast = True`
fail-fast: false
fail-fast: true
matrix:
# TODO(amir): enable `windows-latest`, `macos-latest` and fix possible `poetry` issues and glmnet
# TODO(amir): add `"3.12"` once the glmnet wheel is released
os: ["ubuntu-latest"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11"]
steps:
#----------------------------------------------
# ----- check-out repo and set-up python -----
Expand Down Expand Up @@ -55,15 +54,7 @@ jobs:
uses: actions/cache@v3
with:
path: .venv
# TODO(amir): here, we explicitly set the key independent of what `python-version` we are running
# the main issue is with `glmnet` that does not currently support `python v3.9 and 3.10`
# therefore, all the CI jobs for those python versions failed at first, then we re-run the
# jobs, the cached venv using `python v3.8` will be retrieved and the jobs will run successfully
# ideally, we should be able to add `python-versions` here to distinguish between caches
# key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
# NOTE: `glmnet` has not been updated since 2020; trying to build it on-the-fly
# https://github.com/civisanalytics/python-glmnet/issues/79
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
#----------------------------------------------
# ----- install dependencies -----
#----------------------------------------------
Expand Down
893 changes: 466 additions & 427 deletions examples/quick-starts/metrics/BinaryClassificationMetrics.ipynb

Large diffs are not rendered by default.

4,057 changes: 1,995 additions & 2,062 deletions poetry.lock

Large diffs are not rendered by default.

105 changes: 52 additions & 53 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,98 +52,97 @@ exclude = [


[tool.poetry.dependencies]
python = ">=3.8,<3.12"
python = ">=3.9,<3.13"

# --- scientific-computing ---
# TODO(amir): since numpy "^1.23" distutils is deprecated which glmnet needs it
# wait for `glmnet` for a new release; then upgrade this
numpy = "^1.22,<1.23"
pandas = "^1.5"
scipy = "^1.9"
statsmodels = "^0.13"
numpy = ">=1.22,<2.0"
pandas = ">=1.4,<2.0"
scipy = ">=1.9"
statsmodels = ">=0.13"

# --- machine-learning ---
scikit-learn = "^1.1"
xgboost = "^1.7"
glmnet = "^2.2"
shap = "^0.41"
scikit-learn = ">=1.1"
xgboost = ">=1.7,<2.0"
python-glmnet = ">=2.2"
shap = ">=0.46"

# --- optimization ----
bayesian-optimization = "^1.2"
hyperopt = "^0.2"
bayesian-optimization = ">=1.2"
hyperopt = ">=0.2"

# --- visualization ---
# TODO(amir): matplotlib v>=3.6 comes with breaking changes
# make sure to apply the upgrade and fix the issues once the API refactor is done
matplotlib = "^3.5,<3.6"
seaborn = "^0.12"
matplotlib = ">=3.5"
seaborn = ">=0.12"

# --- command-lines ---
click = "^8.1"
click = ">=8.1"


[tool.poetry.group.dev.dependencies]

# --- package-management ---
pip = "^22.3"
pip = ">=22.3"
setuptools = ">=70.3"

# --- task-management ---
poethepoet = "^0.16"
poethepoet = ">=0.16"

# --- testenv-management ---
tox = "^3.28"
tox = ">=3.28"

# --- formatting ---
add-trailing-comma = "^2.4"
isort = "^5.11"
black = "^22.12"
jupyter-black = "^0.3"
add-trailing-comma = ">=2.4"
isort = ">=5.11"
black = ">=22.12"
jupyter-black = ">=0.3"

# --- linting ---
flake8 = "^5.0"
flake8-commas = "^2.1"
flake8-annotations = "^2.9"
flake8-comprehensions = "^3.10"
flake8-eradicate = "^1.4"
flake8-simplify = "^0.19"
flake8-tidy-imports = "^4.8"
flake8-type-checking = "^2.3"
flake8-typing-imports = "^1.12"
flake8-use-fstring = "^1.4"
pep8-naming = "^0.13"
flake8 = ">=5.0"
flake8-commas = ">=4.0"
flake8-annotations = ">=2.9"
flake8-comprehensions = ">=3.10"
flake8-eradicate = ">=1.4"
flake8-simplify = ">=0.19"
flake8-tidy-imports = ">=4.8"
flake8-type-checking = ">=2.3"
flake8-typing-imports = ">=1.12"
flake8-use-fstring = ">=1.4"
pep8-naming = ">=0.13"

# --- type-checking ---
mypy = "^0.991"
pandas-stubs = "^1.5"
data-science-types = "^0.2"
mypy = ">=0.991"
pandas-stubs = ">=1.5"
data-science-types = ">=0.2"

# --- unit-testing ---
pytest = "^7.2"
pytest-cov = "^4.0"
pytest-mock = "^3.10"
mock = "^4.0"
coverage = "^6.5"
assertpy = "^1.1"
pytest = ">=7.2"
pytest-cov = ">=4.0"
pytest-mock = ">=3.10"
mock = ">=4.0"
coverage = ">=6.5"
assertpy = ">=1.1"

# --- docs ----
sphinx = "^5.3"
sphinx-autoapi = "^1.9"
sphinx_design = "^0.3"
myst-parser = "^0.18"
furo = "^2022.9"
sphinx = ">=5.3"
sphinx-autoapi = ">=1.9"
sphinx_design = ">=0.3"
myst-parser = ">=0.18"
furo = ">=2022.9"

# --- jupyter ---
ipykernel = "^6.20"
jupytext = "^1.14"
ipykernel = ">=6.29"
jupytext = ">=1.14"

# --- monitoring ---
watchdog = "^2.1"
watchdog = ">=2.1"

# --- image manipulation ---
pillow = "^9.3.0"
pillow = ">=9.3"

[build-system]
requires = ["poetry-core>=1.4.0"]
requires = ["poetry-core>=1.9.0"]
build-backend = "poetry.core.masonry.api"


Expand Down
14 changes: 8 additions & 6 deletions src/slickml/metrics/_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,14 @@ def _metrics_dict(self) -> Dict[str, Optional[float]]:
number=self.mse_,
ndigits=self.precision_digits,
),
"Mean Squared Log Error": round(
number=self.msle_,
ndigits=self.precision_digits,
)
if self.msle_
else None,
"Mean Squared Log Error": (
round(
number=self.msle_,
ndigits=self.precision_digits,
)
if self.msle_
else None
),
"Mean Absolute Percentage Error": round(
number=self.mape_,
ndigits=self.precision_digits,
Expand Down
29 changes: 20 additions & 9 deletions src/slickml/optimization/_bayesianopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import xgboost as xgb
from bayes_opt import BayesianOptimization
from bayes_opt.util import UtilityFunction

from slickml.base import BaseXGBoostEstimator
from slickml.utils import check_var
Expand Down Expand Up @@ -366,19 +367,23 @@ def _xgb_eval(
self.optimizer_.maximize(
init_points=self.n_init_iter,
n_iter=self.n_iter,
acq=self.acquisition_criterion,
kappa=2.576,
kappa_decay=1,
kappa_decay_delay=0,
xi=0.0,
acquisition_function=UtilityFunction(
kind=self.acquisition_criterion,
kappa=2.576,
xi=0.0,
kappa_decay=1,
kappa_decay_delay=0,
),
)
self.results_ = self.get_results()
self.best_params_ = self.get_best_params()
self.best_results_ = self.get_best_results()

return None

def get_params_bounds(self) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
def get_params_bounds(
self,
) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
"""Returns the hyper-parameters boundaries for the tuning process.
Returns
Expand Down Expand Up @@ -447,7 +452,9 @@ def get_best_results(self) -> pd.DataFrame:
cond = self.results_[self.metrics] == self.results_[self.metrics].max()
return self.results_.loc[cond, :].reset_index(drop=True)

def _default_params_bounds(self) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
def _default_params_bounds(
self,
) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
"""Default set of parameters when the class is being instantiated with ``params_bounds=None``.
Notes
Expand Down Expand Up @@ -575,9 +582,13 @@ def _metrics_and_objectives_should_be_aligned(self) -> None:
None
"""
if self.metrics in self._clf_metrics() and self.objective not in self._clf_objectives():
raise ValueError("Classification metrics cannot be used with regression objectives.")
raise ValueError(
"Classification metrics cannot be used with regression objectives.",
)

if self.metrics not in self._clf_metrics() and self.objective in self._clf_objectives():
raise ValueError("Regression metrics cannot be used with classification objectives.")
raise ValueError(
"Regression metrics cannot be used with classification objectives.",
)

return None
18 changes: 9 additions & 9 deletions src/slickml/visualization/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def plot_binary_classification_metrics(

# TODO(amir): move this to a function ?
# prepare thresholds for plotting
thr_set1 = np.arange(
min(kwargs["roc_thresholds"]),
max(kwargs["roc_thresholds"]),
0.01,
thr_set1 = np.linspace(
start=min(kwargs["roc_thresholds"]),
stop=max(kwargs["roc_thresholds"]),
num=1000,
)
thr_set2 = np.arange(
min(kwargs["pr_thresholds"]),
max(kwargs["pr_thresholds"]),
0.01,
thr_set2 = np.linspace(
start=min(kwargs["pr_thresholds"]),
stop=max(kwargs["pr_thresholds"]),
num=1000,
)
f1_list = [
2
Expand Down Expand Up @@ -552,7 +552,7 @@ def plot_regression_metrics(
freqs, _, _ = ax5.hist(
kwargs["y_ratio"],
histtype="bar",
bins=np.arange(0.75, 1.25, 0.01),
bins=np.arange(0.75, 1.25, 0.05),
alpha=1.0,
color="#B3C3F3",
edgecolor="navy",
Expand Down
14 changes: 8 additions & 6 deletions tests/slickml/classification/test_glmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def test_glmnetcvclassifier__passes__with_defaults_and_no_test_targets(
assert_that(coeff_path_fig).is_instance_of(Figure)
assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)

@pytest.mark.parametrize(
("clf_train_test_x_y"),
Expand Down Expand Up @@ -257,8 +257,8 @@ def test_glmnetcvclassifier__passes__with_defaults(
assert_that(coeff_path_fig).is_instance_of(Figure)
assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)

# TODO(amir): add a test for `lambda_path` parameter
@pytest.mark.parametrize(
Expand Down Expand Up @@ -499,13 +499,15 @@ def test_glmnetcvclassifier_plots__passes__with_valid_save_paths(
path=coeff_path_fig_path,
expected_size=(1627, 930),
)
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
"shap_waterfall_fig.png",
)
_validate_figure_type_and_size(
path=shap_waterfall_fig_path,
expected_size=(1375, 974),
)
assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
_validate_figure_type_and_size(
path=shap_summary_fig_path,
expected_size=(1474, 760),
expected_size=(1472, 757),
)
10 changes: 7 additions & 3 deletions tests/slickml/classification/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,18 +465,22 @@ def test_xgboostclassifier_plots__passes__with_valid_save_paths(
display_plot=False,
)

assert_that(feature_importance_fig_path.parts[-1]).is_equal_to("feature_importance_fig.png")
assert_that(feature_importance_fig_path.parts[-1]).is_equal_to(
"feature_importance_fig.png",
)
_validate_figure_type_and_size(
path=feature_importance_fig_path,
expected_size=(1395, 943),
)
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
"shap_waterfall_fig.png",
)
_validate_figure_type_and_size(
path=shap_waterfall_fig_path,
expected_size=(1391, 974),
)
assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
_validate_figure_type_and_size(
path=shap_summary_fig_path,
expected_size=(1474, 760),
expected_size=(1472, 757),
)
Loading

0 comments on commit 780fb96

Please sign in to comment.