Update deps, remove python v3.8, and adapt unit-test based on API cha…

…nges ...
slickml · Jul 17, 2024 · 780fb96 · 780fb96
1 parent 8b0d7d7
commit 780fb96
Show file tree

Hide file tree

Showing 18 changed files with 2,626 additions and 2,614 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -9,7 +9,7 @@ omit =
 
 [report]
 show_missing = True
-fail_under = 90
+fail_under = 95
 
 [html]
 directory = htmlcov

diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest"]
-        python-version: ["3.8"]
+        python-version: ["3.9"]
     steps:
       #----------------------------------------------
       # ----- check-out repo and set-up python -----

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,13 +21,12 @@ jobs:
     #----------------------------------------------
     runs-on: ${{ matrix.os }}
     strategy:
-      # TODO(amir): currently this is `false` so we can use the `poetry-cache` of `python v3.8`
-      # once the stupid `glmnet` dependency is resolved, change `fail-fast = True`
-      fail-fast: false
+      fail-fast: true
       matrix:
         # TODO(amir): enable `windows-latest`, `macos-latest` and fix possible `poetry` issues and glmnet
+        # TODO(amir): add `"3.12"` once the glmnet wheel is released
         os: ["ubuntu-latest"]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
       #----------------------------------------------
       # ----- check-out repo and set-up python -----
@@ -55,15 +54,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: .venv
-          # TODO(amir): here, we explicitly set the key independent of what `python-version` we are running
-          # the main issue is with `glmnet` that does not currently support `python v3.9 and 3.10`
-          # therefore, all the CI jobs for those python versions failed at first, then we re-run the
-          # jobs, the cached venv using `python v3.8` will be retrieved and the jobs will run successfully
-          # ideally, we should be able to add `python-versions` here to distinguish between caches
-          # key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
-          # NOTE: `glmnet` has not been updated since 2020; trying to build it on-the-fly
-          # https://github.com/civisanalytics/python-glmnet/issues/79
-          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
+          key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
       #----------------------------------------------
       #  -----  install dependencies  -----
       #----------------------------------------------

diff --git a/examples/quick-starts/metrics/BinaryClassificationMetrics.ipynb b/examples/quick-starts/metrics/BinaryClassificationMetrics.ipynb
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,98 +52,97 @@ exclude = [
 
 
 [tool.poetry.dependencies]
-python = ">=3.8,<3.12"
+python = ">=3.9,<3.13"
 
 # --- scientific-computing ---
-# TODO(amir): since numpy "^1.23" distutils is deprecated which glmnet needs it
-# wait for `glmnet` for a new release; then upgrade this 
-numpy = "^1.22,<1.23"
-pandas = "^1.5"
-scipy = "^1.9"
-statsmodels = "^0.13"
+numpy = ">=1.22,<2.0"
+pandas = ">=1.4,<2.0"
+scipy = ">=1.9"
+statsmodels = ">=0.13"
 
 # --- machine-learning ---
-scikit-learn = "^1.1"
-xgboost = "^1.7"
-glmnet = "^2.2"
-shap = "^0.41"
+scikit-learn = ">=1.1"
+xgboost = ">=1.7,<2.0"
+python-glmnet = ">=2.2"
+shap = ">=0.46"
 
 # --- optimization ----
-bayesian-optimization = "^1.2"
-hyperopt = "^0.2"
+bayesian-optimization = ">=1.2"
+hyperopt = ">=0.2"
 
 # --- visualization ---
 # TODO(amir): matplotlib v>=3.6 comes with breaking changes
 # make sure to apply the upgrade and fix the issues once the API refactor is done
-matplotlib = "^3.5,<3.6"
-seaborn = "^0.12"
+matplotlib = ">=3.5"
+seaborn = ">=0.12"
 
 # --- command-lines ---
-click = "^8.1"
+click = ">=8.1"
 
 
 [tool.poetry.group.dev.dependencies]
 
 # --- package-management ---
-pip = "^22.3"
+pip = ">=22.3"
+setuptools = ">=70.3"
 
 # --- task-management ---
-poethepoet = "^0.16"
+poethepoet = ">=0.16"
 
 # --- testenv-management ---
-tox = "^3.28"
+tox = ">=3.28"
 
 # --- formatting ---
-add-trailing-comma = "^2.4"
-isort = "^5.11"
-black = "^22.12"
-jupyter-black = "^0.3"
+add-trailing-comma = ">=2.4"
+isort = ">=5.11"
+black = ">=22.12"
+jupyter-black = ">=0.3"
 
 # --- linting ---
-flake8 = "^5.0"
-flake8-commas = "^2.1"
-flake8-annotations = "^2.9"
-flake8-comprehensions = "^3.10"
-flake8-eradicate = "^1.4"
-flake8-simplify = "^0.19"
-flake8-tidy-imports = "^4.8"
-flake8-type-checking = "^2.3"
-flake8-typing-imports = "^1.12"
-flake8-use-fstring = "^1.4"
-pep8-naming = "^0.13"
+flake8 = ">=5.0"
+flake8-commas = ">=4.0"
+flake8-annotations = ">=2.9"
+flake8-comprehensions = ">=3.10"
+flake8-eradicate = ">=1.4"
+flake8-simplify = ">=0.19"
+flake8-tidy-imports = ">=4.8"
+flake8-type-checking = ">=2.3"
+flake8-typing-imports = ">=1.12"
+flake8-use-fstring = ">=1.4"
+pep8-naming = ">=0.13"
 
 # --- type-checking ---
-mypy = "^0.991"
-pandas-stubs = "^1.5"
-data-science-types = "^0.2"
+mypy = ">=0.991"
+pandas-stubs = ">=1.5"
+data-science-types = ">=0.2"
 
 # --- unit-testing ---
-pytest = "^7.2"
-pytest-cov = "^4.0"
-pytest-mock = "^3.10"
-mock = "^4.0"
-coverage = "^6.5"
-assertpy = "^1.1"
+pytest = ">=7.2"
+pytest-cov = ">=4.0"
+pytest-mock = ">=3.10"
+mock = ">=4.0"
+coverage = ">=6.5"
+assertpy = ">=1.1"
 
 # --- docs ----
-sphinx = "^5.3"
-sphinx-autoapi = "^1.9"
-sphinx_design = "^0.3"
-myst-parser = "^0.18"
-furo = "^2022.9"
+sphinx = ">=5.3"
+sphinx-autoapi = ">=1.9"
+sphinx_design = ">=0.3"
+myst-parser = ">=0.18"
+furo = ">=2022.9"
 
 # --- jupyter ---
-ipykernel = "^6.20"
-jupytext = "^1.14"
+ipykernel = ">=6.29"
+jupytext = ">=1.14"
 
 # --- monitoring ---
-watchdog = "^2.1"
+watchdog = ">=2.1"
 
 # --- image manipulation ---
-pillow = "^9.3.0"
+pillow = ">=9.3"
 
 [build-system]
-requires = ["poetry-core>=1.4.0"]
+requires = ["poetry-core>=1.9.0"]
 build-backend = "poetry.core.masonry.api"
 
 

diff --git a/src/slickml/metrics/_regression.py b/src/slickml/metrics/_regression.py
@@ -450,12 +450,14 @@ def _metrics_dict(self) -> Dict[str, Optional[float]]:
                 number=self.mse_,
                 ndigits=self.precision_digits,
             ),
-            "Mean Squared Log Error": round(
-                number=self.msle_,
-                ndigits=self.precision_digits,
-            )
-            if self.msle_
-            else None,
+            "Mean Squared Log Error": (
+                round(
+                    number=self.msle_,
+                    ndigits=self.precision_digits,
+                )
+                if self.msle_
+                else None
+            ),
             "Mean Absolute Percentage Error": round(
                 number=self.mape_,
                 ndigits=self.precision_digits,

diff --git a/src/slickml/optimization/_bayesianopt.py b/src/slickml/optimization/_bayesianopt.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import xgboost as xgb
 from bayes_opt import BayesianOptimization
+from bayes_opt.util import UtilityFunction
 
 from slickml.base import BaseXGBoostEstimator
 from slickml.utils import check_var
@@ -366,19 +367,23 @@ def _xgb_eval(
         self.optimizer_.maximize(
             init_points=self.n_init_iter,
             n_iter=self.n_iter,
-            acq=self.acquisition_criterion,
-            kappa=2.576,
-            kappa_decay=1,
-            kappa_decay_delay=0,
-            xi=0.0,
+            acquisition_function=UtilityFunction(
+                kind=self.acquisition_criterion,
+                kappa=2.576,
+                xi=0.0,
+                kappa_decay=1,
+                kappa_decay_delay=0,
+            ),
         )
         self.results_ = self.get_results()
         self.best_params_ = self.get_best_params()
         self.best_results_ = self.get_best_results()
 
         return None
 
-    def get_params_bounds(self) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
+    def get_params_bounds(
+        self,
+    ) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
         """Returns the hyper-parameters boundaries for the tuning process.
 
         Returns
@@ -447,7 +452,9 @@ def get_best_results(self) -> pd.DataFrame:
         cond = self.results_[self.metrics] == self.results_[self.metrics].max()
         return self.results_.loc[cond, :].reset_index(drop=True)
 
-    def _default_params_bounds(self) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
+    def _default_params_bounds(
+        self,
+    ) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
         """Default set of parameters when the class is being instantiated with ``params_bounds=None``.
 
         Notes
@@ -575,9 +582,13 @@ def _metrics_and_objectives_should_be_aligned(self) -> None:
         None
         """
         if self.metrics in self._clf_metrics() and self.objective not in self._clf_objectives():
-            raise ValueError("Classification metrics cannot be used with regression objectives.")
+            raise ValueError(
+                "Classification metrics cannot be used with regression objectives.",
+            )
 
         if self.metrics not in self._clf_metrics() and self.objective in self._clf_objectives():
-            raise ValueError("Regression metrics cannot be used with classification objectives.")
+            raise ValueError(
+                "Regression metrics cannot be used with classification objectives.",
+            )
 
         return None
diff --git a/src/slickml/visualization/_metrics.py b/src/slickml/visualization/_metrics.py
@@ -72,15 +72,15 @@ def plot_binary_classification_metrics(
 
     # TODO(amir): move this to a function ?
     # prepare thresholds for plotting
-    thr_set1 = np.arange(
-        min(kwargs["roc_thresholds"]),
-        max(kwargs["roc_thresholds"]),
-        0.01,
+    thr_set1 = np.linspace(
+        start=min(kwargs["roc_thresholds"]),
+        stop=max(kwargs["roc_thresholds"]),
+        num=1000,
     )
-    thr_set2 = np.arange(
-        min(kwargs["pr_thresholds"]),
-        max(kwargs["pr_thresholds"]),
-        0.01,
+    thr_set2 = np.linspace(
+        start=min(kwargs["pr_thresholds"]),
+        stop=max(kwargs["pr_thresholds"]),
+        num=1000,
     )
     f1_list = [
         2
@@ -552,7 +552,7 @@ def plot_regression_metrics(
     freqs, _, _ = ax5.hist(
         kwargs["y_ratio"],
         histtype="bar",
-        bins=np.arange(0.75, 1.25, 0.01),
+        bins=np.arange(0.75, 1.25, 0.05),
         alpha=1.0,
         color="#B3C3F3",
         edgecolor="navy",

diff --git a/tests/slickml/classification/test_glmnet.py b/tests/slickml/classification/test_glmnet.py
@@ -146,8 +146,8 @@ def test_glmnetcvclassifier__passes__with_defaults_and_no_test_targets(
         assert_that(coeff_path_fig).is_instance_of(Figure)
         assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
         assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
-        npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
-        npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
+        npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
+        npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)
 
     @pytest.mark.parametrize(
         ("clf_train_test_x_y"),
@@ -257,8 +257,8 @@ def test_glmnetcvclassifier__passes__with_defaults(
         assert_that(coeff_path_fig).is_instance_of(Figure)
         assert_that(shap_waterfall_test_fig).is_instance_of(Figure)
         assert_that(shap_waterfall_train_fig).is_instance_of(Figure)
-        npt.assert_almost_equal(np.mean(clf.shap_values_test_), 0.00529, decimal=5)
-        npt.assert_almost_equal(np.mean(clf.shap_values_train_), 0.01112, decimal=5)
+        npt.assert_almost_equal(np.mean(clf.shap_values_test_), -0.01119, decimal=5)
+        npt.assert_almost_equal(np.mean(clf.shap_values_train_), -0.00536, decimal=5)
 
     # TODO(amir): add a test for `lambda_path` parameter
     @pytest.mark.parametrize(
@@ -499,13 +499,15 @@ def test_glmnetcvclassifier_plots__passes__with_valid_save_paths(
             path=coeff_path_fig_path,
             expected_size=(1627, 930),
         )
-        assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
+        assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
+            "shap_waterfall_fig.png",
+        )
         _validate_figure_type_and_size(
             path=shap_waterfall_fig_path,
             expected_size=(1375, 974),
         )
         assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
         _validate_figure_type_and_size(
             path=shap_summary_fig_path,
-            expected_size=(1474, 760),
+            expected_size=(1472, 757),
         )
diff --git a/tests/slickml/classification/test_xgboost.py b/tests/slickml/classification/test_xgboost.py
@@ -465,18 +465,22 @@ def test_xgboostclassifier_plots__passes__with_valid_save_paths(
             display_plot=False,
         )
 
-        assert_that(feature_importance_fig_path.parts[-1]).is_equal_to("feature_importance_fig.png")
+        assert_that(feature_importance_fig_path.parts[-1]).is_equal_to(
+            "feature_importance_fig.png",
+        )
         _validate_figure_type_and_size(
             path=feature_importance_fig_path,
             expected_size=(1395, 943),
         )
-        assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to("shap_waterfall_fig.png")
+        assert_that(shap_waterfall_fig_path.parts[-1]).is_equal_to(
+            "shap_waterfall_fig.png",
+        )
         _validate_figure_type_and_size(
             path=shap_waterfall_fig_path,
             expected_size=(1391, 974),
         )
         assert_that(shap_summary_fig_path.parts[-1]).is_equal_to("shap_summary_fig.png")
         _validate_figure_type_and_size(
             path=shap_summary_fig_path,
-            expected_size=(1474, 760),
+            expected_size=(1472, 757),
         )