Skip to content

Commit

Permalink
[enhancement] conform to sklearn1.6 validate_data API change (#…
Browse files Browse the repository at this point in the history
…2048)

* initial commit

* temporary

* temp

* first attempt

* tracing now available

* latest ideas

* add skip

* updates

* first light

* save progress

* add validate_data test

* continued fixes

* fixes

* introduce necessary sklearn 1.6 changes

* formatting and swap to conformance

* add SPECIAL_INSTANCES

* formatting

* fix svm test failure

* add docs, namedtuple

* move back to basic python datatypes

* remove daal4py at source

* remove old code

* fix import

* swap to else
  • Loading branch information
icfaust authored Sep 24, 2024
1 parent 486b78c commit 9f5390c
Show file tree
Hide file tree
Showing 26 changed files with 516 additions and 106 deletions.
2 changes: 1 addition & 1 deletion onedal/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _ovr_decision_function(self, predictions, confidences, n_classes):
def _decision_function(self, X, module, queue):
_check_is_fitted(self)
X = _check_array(
X, dtype=[np.float64, np.float32], force_all_finite=False, accept_sparse="csr"
X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse="csr"
)
_check_n_features(self, X, False)

Expand Down
7 changes: 6 additions & 1 deletion sklearnex/basic_statistics/basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
from .._device_offload import dispatch
from .._utils import PatchingConditionsChain

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data

if sklearn_check_version("1.2"):
from sklearn.utils._param_validation import StrOptions

Expand Down Expand Up @@ -145,7 +150,7 @@ def _onedal_fit(self, X, sample_weight=None, queue=None):
self._validate_params()

if sklearn_check_version("1.0"):
X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_2d=False)
X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_2d=False)
else:
X = check_array(X, dtype=[np.float64, np.float32])

Expand Down
10 changes: 8 additions & 2 deletions sklearnex/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
import numbers
import warnings

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data


@control_n_jobs(decorated_methods=["partial_fit", "_onedal_finalize_fit"])
class IncrementalBasicStatistics(BaseEstimator):
Expand Down Expand Up @@ -145,7 +150,8 @@ def _onedal_partial_fit(self, X, sample_weight=None, queue=None):
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
reset=first_pass,
Expand Down Expand Up @@ -180,7 +186,7 @@ def _onedal_fit(self, X, sample_weight=None, queue=None):
self._validate_params()

if sklearn_check_version("1.0"):
X = self._validate_data(X, dtype=[np.float64, np.float32])
X = validate_data(self, X, dtype=[np.float64, np.float32])
else:
X = check_array(X, dtype=[np.float64, np.float32])

Expand Down
7 changes: 6 additions & 1 deletion sklearnex/cluster/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
from sklearn.utils import check_scalar

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_DBSCAN._validate_data


class BaseDBSCAN(ABC):
def _onedal_dbscan(self, **onedal_params):
Expand Down Expand Up @@ -85,7 +90,7 @@ def __init__(

def _onedal_fit(self, X, y, sample_weight=None, queue=None):
if sklearn_check_version("1.0"):
X = self._validate_data(X, force_all_finite=False)
X = validate_data(self, X, force_all_finite=False)

onedal_params = {
"eps": self.eps,
Expand Down
14 changes: 11 additions & 3 deletions sklearnex/cluster/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
from .._device_offload import dispatch, wrap_output_data
from .._utils import PatchingConditionsChain

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_KMeans._validate_data

@control_n_jobs(decorated_methods=["fit", "predict", "transform", "fit_transform"])
class KMeans(sklearn_KMeans):
__doc__ = sklearn_KMeans.__doc__
Expand Down Expand Up @@ -150,7 +155,8 @@ def fit(self, X, y=None, sample_weight=None):
return self

def _onedal_fit(self, X, _, sample_weight, queue=None):
X = self._validate_data(
X = validate_data(
self,
X,
accept_sparse="csr",
dtype=[np.float64, np.float32],
Expand Down Expand Up @@ -271,7 +277,8 @@ def predict(
def _onedal_predict(self, X, sample_weight=None, queue=None):
check_is_fitted(self)

X = self._validate_data(
X = validate_data(
self,
X,
accept_sparse="csr",
reset=False,
Expand Down Expand Up @@ -335,7 +342,8 @@ def score(self, X, y=None, sample_weight=None):
def _onedal_score(self, X, y, sample_weight=None, queue=None):
check_is_fitted(self)

X = self._validate_data(
X = validate_data(
self,
X,
accept_sparse="csr",
reset=False,
Expand Down
19 changes: 15 additions & 4 deletions sklearnex/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
if sklearn_check_version("1.2"):
from sklearn.utils._param_validation import Interval

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data


@control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
class IncrementalEmpiricalCovariance(BaseEstimator):
Expand Down Expand Up @@ -163,7 +168,8 @@ def _onedal_partial_fit(self, X, queue=None, check_input=True):
self._validate_params()

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
reset=first_pass,
Expand Down Expand Up @@ -204,7 +210,8 @@ def score(self, X_test, y=None):

location = self.location_
if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X_test,
dtype=[np.float64, np.float32],
reset=False,
Expand Down Expand Up @@ -306,8 +313,12 @@ def _onedal_fit(self, X, queue=None):

# finite check occurs on onedal side
if sklearn_check_version("1.0"):
X = self._validate_data(
X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
copy=self.copy,
force_all_finite=False,
)
else:
X = check_array(
Expand Down
27 changes: 19 additions & 8 deletions sklearnex/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import numpy as np
from scipy.sparse import issparse
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_array, check_is_fitted

from daal4py.sklearn._n_jobs_support import control_n_jobs
from daal4py.sklearn._utils import sklearn_check_version
Expand All @@ -44,6 +44,11 @@

from onedal.decomposition import PCA as onedal_PCA

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_PCA._validate_data

@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
class PCA(sklearn_PCA):
__doc__ = sklearn_PCA.__doc__
Expand Down Expand Up @@ -133,7 +138,8 @@ def _fit(self, X):
)

def _onedal_fit(self, X, queue=None):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
ensure_2d=True,
Expand Down Expand Up @@ -177,12 +183,17 @@ def transform(self, X):
def _onedal_transform(self, X, queue=None):
check_is_fitted(self)
if sklearn_check_version("1.0"):
self._check_feature_names(X, reset=False)
X = self._validate_data(
X,
dtype=[np.float64, np.float32],
reset=False,
)
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
reset=False,
)
else:
X = check_array(
X,
dtype=[np.float64, np.float32],
)
self._validate_n_features_in_after_fitting(X)

return self._onedal_estimator.predict(X, queue=queue)
Expand Down
48 changes: 28 additions & 20 deletions sklearnex/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import numpy as np
from scipy import sparse as sp
from sklearn.base import clone
from sklearn.base import BaseEstimator, clone
from sklearn.ensemble import ExtraTreesClassifier as sklearn_ExtraTreesClassifier
from sklearn.ensemble import ExtraTreesRegressor as sklearn_ExtraTreesRegressor
from sklearn.ensemble import RandomForestClassifier as sklearn_RandomForestClassifier
Expand Down Expand Up @@ -67,12 +67,18 @@
if sklearn_check_version("1.4"):
from daal4py.sklearn.utils import _assert_all_finite

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data


class BaseForest(ABC):
_onedal_factory = None

def _onedal_fit(self, X, y, sample_weight=None, queue=None):
X, y = self._validate_data(
X, y = validate_data(
self,
X,
y,
multi_output=True,
Expand Down Expand Up @@ -607,21 +613,7 @@ def predict_proba(self, X):
# TODO:
# _check_proba()
# self._check_proba()
if sklearn_check_version("1.0"):
self._check_feature_names(X, reset=False)
if hasattr(self, "n_features_in_"):
try:
num_features = _num_features(X)
except TypeError:
num_features = _num_samples(X)
if num_features != self.n_features_in_:
raise ValueError(
(
f"X has {num_features} features, "
f"but {self.__class__.__name__} is expecting "
f"{self.n_features_in_} features as input"
)
)

return dispatch(
self,
"predict_proba",
Expand Down Expand Up @@ -797,7 +789,8 @@ def _onedal_predict(self, X, queue=None):
check_is_fitted(self, "_onedal_estimator")

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
force_all_finite=False,
Expand All @@ -810,6 +803,19 @@ def _onedal_predict(self, X, queue=None):
dtype=[np.float64, np.float32],
force_all_finite=False,
) # Warning, order of dtype matters
if hasattr(self, "n_features_in_"):
try:
num_features = _num_features(X)
except TypeError:
num_features = _num_samples(X)
if num_features != self.n_features_in_:
raise ValueError(
(
f"X has {num_features} features, "
f"but {self.__class__.__name__} is expecting "
f"{self.n_features_in_} features as input"
)
)
self._check_n_features(X, reset=False)

res = self._onedal_estimator.predict(X, queue=queue)
Expand All @@ -819,7 +825,8 @@ def _onedal_predict_proba(self, X, queue=None):
check_is_fitted(self, "_onedal_estimator")

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
force_all_finite=False,
Expand Down Expand Up @@ -1123,7 +1130,8 @@ def _onedal_predict(self, X, queue=None):
check_is_fitted(self, "_onedal_estimator")

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
force_all_finite=False,
Expand Down
14 changes: 11 additions & 3 deletions sklearnex/linear_model/incremental_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
if sklearn_check_version("1.2"):
from sklearn.utils._param_validation import Interval

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = BaseEstimator._validate_data

from onedal.common.hyperparameters import get_hyperparameters

from .._device_offload import dispatch, wrap_output_data
Expand Down Expand Up @@ -131,7 +136,8 @@ def _onedal_predict(self, X, queue=None):
self._validate_params()

if sklearn_check_version("1.0"):
X = self._validate_data(
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
copy=self.copy_X,
Expand Down Expand Up @@ -162,7 +168,8 @@ def _onedal_partial_fit(self, X, y, check_input=True, queue=None):

if check_input:
if sklearn_check_version("1.0"):
X, y = self._validate_data(
X, y = validate_data(
self,
X,
y,
dtype=[np.float64, np.float32],
Expand Down Expand Up @@ -212,7 +219,8 @@ def _onedal_fit(self, X, y, queue=None):
self._validate_params()

if sklearn_check_version("1.0"):
X, y = self._validate_data(
X, y = validate_data(
self,
X,
y,
dtype=[np.float64, np.float32],
Expand Down
Loading

0 comments on commit 9f5390c

Please sign in to comment.