From 7a8fae404364dd3bec70bf4d3c3902bf18a76323 Mon Sep 17 00:00:00 2001
From: Reid Johnson <reid.johnson@gmail.com>
Date: Tue, 12 Nov 2024 01:48:27 -0600
Subject: [PATCH] scikit-learn 1.6 Compatibility (#105)

* Fixes for scikit-learn 1.6
---
 quantile_forest/_quantile_forest.py           |  47 +++++++-
 quantile_forest/tests/test_quantile_forest.py | 112 +++++++++++-------
 2 files changed, 111 insertions(+), 48 deletions(-)

diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py
index da04f5e..c9f25bd 100755
--- a/quantile_forest/_quantile_forest.py
+++ b/quantile_forest/_quantile_forest.py
@@ -30,6 +30,7 @@ class calls the ``fit`` method of the ``ForestRegressor`` and creates a
 
 import joblib
 import numpy as np
+import sklearn
 from sklearn.ensemble._forest import (
     ForestRegressor,
     _generate_sample_indices,
@@ -38,11 +39,19 @@ class calls the ``fit`` method of the ``ForestRegressor`` and creates a
 from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
 from sklearn.tree._tree import DTYPE
 from sklearn.utils._param_validation import Interval, RealNotInt
+from sklearn.utils.fixes import parse_version
 from sklearn.utils.validation import check_is_fitted
 
+try:
+    from sklearn.utils.validation import validate_data
+except ImportError:
+    validate_data = None
+
 from ._quantile_forest_fast import QuantileForest
 from ._utils import generate_unsampled_indices, group_indices_by_value, map_indices_to_leaves
 
+sklearn_version = parse_version(sklearn.__version__)
+
 
 class BaseForestQuantileRegressor(ForestRegressor):
     """Base class for quantile regression forests.
@@ -132,9 +141,23 @@ def fit(self, X, y, sample_weight=None, sparse_pickle=False):
                 )
 
         super(BaseForestQuantileRegressor, self).fit(X, y, sample_weight=sample_weight)
-        X, y = self._validate_data(
-            X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE, force_all_finite=False
-        )
+
+        validation_params = {
+            "X": X,
+            "y": y,
+            "multi_output": True,
+            "accept_sparse": "csc",
+            "dtype": DTYPE,
+            (
+                "force_all_finite"
+                if sklearn_version < parse_version("1.6.dev0")
+                else "ensure_all_finite"
+            ): False,
+        }
+        if validate_data is None:
+            X, y = self._validate_data(**validation_params)
+        else:
+            X, y = validate_data(self, **validation_params)
 
         if y.ndim == 1:
             y = np.expand_dims(y, axis=1)
@@ -816,7 +839,23 @@ def quantile_ranks(
             Quantile ranks in range [0, 1].
         """
         check_is_fitted(self)
-        X, y = self._validate_data(X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE)
+
+        validation_params = {
+            "X": X,
+            "y": y,
+            "multi_output": True,
+            "accept_sparse": "csc",
+            "dtype": DTYPE,
+            (
+                "force_all_finite"
+                if sklearn_version < parse_version("1.6.dev0")
+                else "ensure_all_finite"
+            ): False,
+        }
+        if validate_data is None:
+            X, y = self._validate_data(**validation_params)
+        else:
+            X, y = validate_data(self, **validation_params)
 
         if not isinstance(kind, (bytes, bytearray)):
             kind = kind.encode()
diff --git a/quantile_forest/tests/test_quantile_forest.py b/quantile_forest/tests/test_quantile_forest.py
index 9536d17..4b89858 100755
--- a/quantile_forest/tests/test_quantile_forest.py
+++ b/quantile_forest/tests/test_quantile_forest.py
@@ -18,7 +18,6 @@
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_equal,
-    assert_raises,
 )
 from sklearn.utils.validation import check_is_fitted, check_random_state
 
@@ -264,7 +263,8 @@ def check_predict_quantiles_toy(name):
             weighted_leaves=False,
             oob_score=oob_score,
         )
-        assert_raises(AssertionError, assert_allclose, y_pred1, y_pred2)
+        with pytest.raises(AssertionError):
+            assert_allclose(y_pred1, y_pred2)
 
         # Check that leaf weighting without weighted quantiles does nothing.
         y_pred1 = est.predict(
@@ -579,8 +579,10 @@ def check_predict_quantiles(
     assert np.any(y_pred_1 != y_pred_2)
 
     # Check error if invalid quantiles.
-    assert_raises(ValueError, est.predict, X_test, -0.01)
-    assert_raises(ValueError, est.predict, X_test, 1.01)
+    with pytest.raises(ValueError):
+        est.predict(X_test, -0.01)
+    with pytest.raises(ValueError):
+        est.predict(X_test, 1.01)
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -720,7 +722,8 @@ def check_quantile_ranks(name):
 
     # Check error if training and test number of targets are not equal.
     est.fit(X_train, y_train[:, 0])  # training target size = 1
-    assert_raises(ValueError, est.quantile_ranks, X_test, y_test[:, :2])  # test target size = 2
+    with pytest.raises(ValueError):
+        est.quantile_ranks(X_test, y_test[:, :2])  # test target size = 2
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -773,10 +776,12 @@ def check_proximity_counts(name):
     assert_array_equal([len(p) for p in proximities], [len(e) for e in expected])
 
     # Check error if `max_proximities` < 1.
-    assert_raises(ValueError, est.proximity_counts, X, max_proximities=0)
+    with pytest.raises(ValueError):
+        est.proximity_counts(X, max_proximities=0)
 
     # Check error if `max_proximities` is a float.
-    assert_raises(ValueError, est.proximity_counts, X, max_proximities=1.5)
+    with pytest.raises(ValueError):
+        est.proximity_counts(X, max_proximities=1.5)
 
     # Check that proximity counts match expected counts without splits.
     est = ForestRegressor(
@@ -869,14 +874,25 @@ def check_max_samples_leaf(name):
         for param_validation in [True, False]:
             est = ForestRegressor(n_estimators=1, max_samples_leaf=max_samples_leaf)
             est.param_validation = param_validation
-            assert_raises(ValueError, est.fit, X, y)
+            with pytest.raises(ValueError):
+                est.fit(X, y)
             est.max_samples_leaf = max_samples_leaf
-            assert_raises(ValueError, est._get_y_train_leaves, X, y)
+            with pytest.raises(ValueError):
+                est._get_y_train_leaves(X, y)
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
 def test_max_samples_leaf(name):
     check_max_samples_leaf(name)
+    """
+    Test that `max_samples_leaf` is correctly passed to the `fit` method,
+    and that it results in the correct maximum leaf size.
+
+    Parameters
+    ----------
+    name : str
+        The name of the forest regressor to test.
+    """
 
 
 def check_oob_samples(name):
@@ -1065,16 +1081,16 @@ def check_predict_oob(
     assert_allclose(y_pred_oob1, y_pred_oob2)
 
     # Check error if OOB score without `indices` do not match training count.
-    assert_raises(ValueError, est.predict, X[:1], oob_score=True)
+    with pytest.raises(ValueError):
+        est.predict(X[:1], oob_score=True)
 
     # Check error if OOB score with `indices` do not match samples count.
-    assert_raises(
-        ValueError,
-        est.predict,
-        X,
-        oob_score=True,
-        indices=-np.ones(len(X) - 1),
-    )
+    with pytest.raises(ValueError):
+        est.predict(
+            X,
+            oob_score=True,
+            indices=-np.ones(len(X) - 1),
+        )
 
     # Check warning if not enough estimators.
     with np.errstate(divide="ignore", invalid="ignore"):
@@ -1106,14 +1122,13 @@ def check_predict_oob(
     # Check error if no bootstrapping.
     est = ForestRegressor(n_estimators=1, bootstrap=False)
     est.fit(X, y)
-    assert_raises(
-        ValueError,
-        est.predict,
-        X,
-        weighted_quantile=weighted_quantile,
-        aggregate_leaves_first=aggregate_leaves_first,
-        oob_score=True,
-    )
+    with pytest.raises(ValueError):
+        est.predict(
+            X,
+            weighted_quantile=weighted_quantile,
+            aggregate_leaves_first=aggregate_leaves_first,
+            oob_score=True,
+        )
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
         assert np.all(est._get_unsampled_indices(est.estimators_[0]) == np.array([]))
@@ -1121,15 +1136,14 @@ def check_predict_oob(
     # Check error if number of scoring and training samples are different.
     est = ForestRegressor(n_estimators=1, bootstrap=True)
     est.fit(X, y)
-    assert_raises(
-        ValueError,
-        est.predict,
-        X[:1],
-        y[:1],
-        weighted_quantile=weighted_quantile,
-        aggregate_leaves_first=aggregate_leaves_first,
-        oob_score=True,
-    )
+    with pytest.raises(ValueError):
+        est.predict(
+            X[:1],
+            y[:1],
+            weighted_quantile=weighted_quantile,
+            aggregate_leaves_first=aggregate_leaves_first,
+            oob_score=True,
+        )
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -1200,12 +1214,14 @@ def check_quantile_ranks_oob(name):
     # Check error if no bootstrapping.
     est = ForestRegressor(n_estimators=1, bootstrap=False)
     est.fit(X, y)
-    assert_raises(ValueError, est.quantile_ranks, X, y, oob_score=True)
+    with pytest.raises(ValueError):
+        est.quantile_ranks(X, y, oob_score=True)
 
     # Check error if number of scoring and training samples are different.
     est = ForestRegressor(n_estimators=1, bootstrap=True)
     est.fit(X, y)
-    assert_raises(ValueError, est.quantile_ranks, X[:1], y[:1], oob_score=True)
+    with pytest.raises(ValueError):
+        est.quantile_ranks(X[:1], y[:1], oob_score=True)
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -1284,7 +1300,8 @@ def check_proximity_counts_oob(name):
     # Check error if no bootstrapping.
     est = ForestRegressor(n_estimators=1, max_samples_leaf=None, bootstrap=False)
     est.fit(X, y)
-    assert_raises(ValueError, est.proximity_counts, X, oob_score=True)
+    with pytest.raises(ValueError):
+        est.proximity_counts(X, oob_score=True)
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -1357,7 +1374,8 @@ def check_monotonic_constraints(name, max_samples_leaf):
         max_leaf_nodes=n_samples_train,
         bootstrap=True,
     )
-    assert_raises(ValueError, est.fit, X_train, y_train)
+    with pytest.raises(ValueError):
+        est.fit(X_train, y_train)
 
 
 @pytest.mark.parametrize("name", FOREST_REGRESSORS)
@@ -1466,8 +1484,10 @@ def test_calc_quantile():
     assert actual1 != actual2
 
     # Check error if invalid parameters.
-    assert_raises(TypeError, calc_quantile, [1, 2], 0.5)
-    assert_raises(TypeError, calc_quantile, [1, 2], [0.5], interpolation=None)
+    with pytest.raises(TypeError):
+        calc_quantile([1, 2], 0.5)
+    with pytest.raises(TypeError):
+        calc_quantile([1, 2], [0.5], interpolation=None)
 
 
 def test_calc_weighted_quantile():
@@ -1585,8 +1605,10 @@ def _dicts_to_input_pairs(input_dicts):
     assert actual1 != actual2
 
     # Check error if invalid parameters.
-    assert_raises(TypeError, calc_weighted_quantile, [1, 2], [1, 1], 0.5)
-    assert_raises(TypeError, calc_weighted_quantile, [1, 2], [1, 1], [0.5], interpolation=None)
+    with pytest.raises(TypeError):
+        calc_weighted_quantile([1, 2], [1, 1], 0.5)
+    with pytest.raises(TypeError):
+        calc_weighted_quantile([1, 2], [1, 1], [0.5], interpolation=None)
 
 
 def test_calc_quantile_rank():
@@ -1635,5 +1657,7 @@ def test_calc_quantile_rank():
     assert actual1 != actual2
 
     # Check error if invalid parameters.
-    assert_raises(TypeError, calc_quantile_rank, [1, 2], [1])
-    assert_raises(TypeError, calc_quantile_rank, [1, 2], float(1), kind=None)
+    with pytest.raises(TypeError):
+        calc_quantile_rank([1, 2], [1])
+    with pytest.raises(TypeError):
+        calc_quantile_rank([1, 2], float(1), kind=None)