fix stacking classifiers

rasbt · rasbt · commit 0eedb1aaaa7d · 2024-03-30T08:42:19.000-05:00
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -7,6 +7,20 @@ The CHANGELOG for the current development version is available at
 
 ---
 
+### Version 0.23.2 (TBD)
+
+##### Downloads
+
+- [Source code (zip)](https://github.com/rasbt/mlxtend/archive/v0.23.2.zip)
+
+- [Source code (tar.gz)](https://github.com/rasbt/mlxtend/archive/v0.23.2.tar.gz)
+
+##### Changes
+
+- Add `n_classes_` attribute to stacking classifiers for compatibility with scikit-learn 1.3 ([#1091](https://github.com/rasbt/mlxtend/issues/1091)
+
+
+
 ### Version 0.23.1 (5 Jan 2024)
 
 ##### Downloads
diff --git a/mlxtend/__init__.py b/mlxtend/__init__.py
@@ -4,4 +4,4 @@
 #
 # License: BSD 3 clause
 
-__version__ = "0.23.1"
+__version__ = "0.23.2dev"
diff --git a/mlxtend/classifier/stacking_classification.py b/mlxtend/classifier/stacking_classification.py
@@ -13,6 +13,7 @@
 import numpy as np
 from scipy import sparse
 from sklearn.base import TransformerMixin, clone
+from sklearn.preprocessing import LabelEncoder
 
 from ..externals.estimator_checks import check_is_fitted
 from ..externals.name_estimators import _name_estimators
@@ -95,6 +96,9 @@ class StackingClassifier(_BaseXComposition, _BaseStackingClassifier, Transformer
         Fitted classifiers (clones of the original classifiers)
     meta_clf_ : estimator
         Fitted meta-classifier (clone of the original meta-estimator)
+    classes_ : ndarray of shape (n_classes,) or list of ndarray if `y` \
+            is of type `"multilabel-indicator"`.
+            Class labels.
     train_meta_features : numpy array, shape = [n_samples, n_classifiers]
         meta-features for training data, where n_samples is the
         number of samples
@@ -175,6 +179,13 @@ def fit(self, X, y, sample_weight=None):
             self.clfs_ = self.classifiers
             self.meta_clf_ = self.meta_classifier
 
+        if y.ndim > 1:
+            self._label_encoder = [LabelEncoder().fit(yk) for yk in y.T]
+            self.classes_ = [le.classes_ for le in self._label_encoder]
+        else:
+            self._label_encoder = LabelEncoder().fit(y)
+            self.classes_ = self._label_encoder.classes_
+
         if self.fit_base_estimators:
             if self.verbose > 0:
                 print("Fitting %d classifiers..." % (len(self.classifiers)))
diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py
@@ -14,6 +14,7 @@
 from sklearn.base import TransformerMixin, clone
 from sklearn.model_selection import cross_val_predict
 from sklearn.model_selection._split import check_cv
+from sklearn.preprocessing import LabelEncoder
 
 from ..externals.estimator_checks import check_is_fitted
 from ..externals.name_estimators import _name_estimators
@@ -129,6 +130,9 @@ class StackingCVClassifier(
         Fitted classifiers (clones of the original classifiers)
     meta_clf_ : estimator
         Fitted meta-classifier (clone of the original meta-estimator)
+    classes_ : ndarray of shape (n_classes,) or list of ndarray if `y` \
+            is of type `"multilabel-indicator"`.
+            Class labels.
     train_meta_features : numpy array, shape = [n_samples, n_classifiers]
         meta-features for training data, where n_samples is the
         number of samples
@@ -220,6 +224,13 @@ def fit(self, X, y, groups=None, sample_weight=None):
         if self.verbose > 0:
             print("Fitting %d classifiers..." % (len(self.classifiers)))
 
+        if y.ndim > 1:
+            self._label_encoder = [LabelEncoder().fit(yk) for yk in y.T]
+            self.classes_ = [le.classes_ for le in self._label_encoder]
+        else:
+            self._label_encoder = LabelEncoder().fit(y)
+            self.classes_ = self._label_encoder.classes_
+
         final_cv = check_cv(self.cv, y, classifier=self.stratify)
         if isinstance(self.cv, int):
             # Override shuffle parameter in case of self generated
diff --git a/mlxtend/classifier/tests/test_stacking_classifier.py b/mlxtend/classifier/tests/test_stacking_classifier.py
@@ -33,7 +33,6 @@
 y2 = np.c_[y, y]
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_StackingClassifier():
     np.random.seed(123)
     meta = LogisticRegression(
@@ -162,7 +161,6 @@ def test_weight_unsupported_no_weight():
     sclf.fit(X, y)
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_StackingClassifier_proba_avg_1():
     np.random.seed(123)
     meta = LogisticRegression(solver="liblinear", multi_class="ovr", random_state=1)
@@ -177,7 +175,6 @@ def test_StackingClassifier_proba_avg_1():
     assert scores_mean == 0.93, scores_mean
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_StackingClassifier_proba_concat_1():
     np.random.seed(123)
     meta = LogisticRegression(solver="liblinear", multi_class="ovr")
@@ -325,7 +322,6 @@ def test_gridsearch_enumerate_names():
     grid = grid.fit(X, y)
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_use_probas():
     np.random.seed(123)
     meta = LogisticRegression(solver="liblinear", multi_class="ovr")
@@ -391,7 +387,6 @@ def test_verbose():
     sclf.fit(X, y)
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_use_features_in_secondary_predict():
     np.random.seed(123)
     X, y = iris_data()
@@ -424,7 +419,6 @@ def test_use_features_in_secondary_predict_proba():
     np.testing.assert_almost_equal(y_pred, expect, 3)
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_use_features_in_secondary_sparse_input_predict():
     np.random.seed(123)
     X, y = iris_data()
@@ -537,7 +531,6 @@ def test_clone():
     clone(stclf)
 
 
-@pytest.mark.skip(reason="scikit-learn implemented a StackingClassifier in 0.22.")
 def test_decision_function():
     np.random.seed(123)
 
@@ -572,7 +565,7 @@ def test_decision_function():
     if Version(sklearn_version) < Version("0.22"):
         assert scores_mean == 0.95, scores_mean
     else:
-        assert scores_mean == 0.94, scores_mean
+        assert scores_mean == 0.93, scores_mean
 
 
 def test_drop_col_unsupported():
diff --git a/mlxtend/classifier/tests/test_stacking_cv_classifier.py b/mlxtend/classifier/tests/test_stacking_cv_classifier.py
@@ -40,9 +40,6 @@
 X_breast, y_breast = breast_cancer.data[:, 1:3], breast_cancer.target
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_StackingCVClassifier():
     np.random.seed(123)
     meta = LogisticRegression(multi_class="ovr", solver="liblinear")
@@ -174,9 +171,7 @@ def test_no_weight_support_with_no_weight():
     sclf.fit(X_iris, y_iris)
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
+
 def test_StackingClassifier_proba():
     np.random.seed(12)
     meta = LogisticRegression(multi_class="ovr", solver="liblinear")
@@ -245,9 +240,6 @@ def test_gridsearch_enumerate_names():
     grid = grid.fit(X, y)
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_use_probas():
     np.random.seed(123)
     meta = LogisticRegression(multi_class="ovr", solver="liblinear")
@@ -262,9 +254,6 @@ def test_use_probas():
     assert scores_mean == 0.94, scores_mean
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_use_features_in_secondary():
     np.random.seed(123)
     meta = LogisticRegression(multi_class="ovr", solver="liblinear")
@@ -282,9 +271,6 @@ def test_use_features_in_secondary():
     assert scores_mean == 0.93, scores_mean
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_do_not_stratify():
     meta = LogisticRegression(multi_class="ovr", solver="liblinear")
     clf1 = RandomForestClassifier(n_estimators=10)
@@ -298,9 +284,6 @@ def test_do_not_stratify():
     assert scores_mean == 0.93, scores.mean()
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_cross_validation_technique():
     # This is like the `test_do_not_stratify` but instead
     # autogenerating the cross validation strategy it provides
@@ -640,9 +623,6 @@ def test_works_with_df_if_fold_indexes_missing():
         )
 
 
-@pytest.mark.skip(
-    reason="scikit-learn implemented a StackingClassifier in 0.22. It has built-in cross-validation."
-)
 def test_decision_function():
     np.random.seed(123)
 

Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,4 @@`
`4`	`4`	`#`
`5`	`5`	`# License: BSD 3 clause`
`6`	`6`
`7`		`-__version__ = "0.23.1"`
	`7`	`+__version__ = "0.23.2dev"`