diff --git a/atom/__init__.py b/atom/__init__.py index f01e770e9..21a19239e 100644 --- a/atom/__init__.py +++ b/atom/__init__.py @@ -12,4 +12,4 @@ from atom.utils.constants import __version__ -sklearn.set_config(transform_output="pandas") +sklearn.set_config(transform_output="pandas", enable_metadata_routing=True) diff --git a/atom/atom.py b/atom/atom.py index 7b4064a76..e6e2fdc89 100644 --- a/atom/atom.py +++ b/atom/atom.py @@ -1349,7 +1349,7 @@ def add( """Add a transformer to the pipeline. If the transformer is not fitted, it is fitted on the complete - training set. Afterwards, the data set is transformed and the + training set. Afterward, the data set is transformed and the estimator is added to atom's pipeline. If the estimator is a sklearn Pipeline, every estimator is merged independently with atom. @@ -1639,7 +1639,7 @@ def encode( max_onehot: IntLargerTwo | None = 10, ordinal: dict[str, Sequence[Any]] | None = None, infrequent_to_value: FloatLargerZero | None = None, - value: str = "rare", + value: str = "infrequent", **kwargs, ): """Perform encoding of categorical features. diff --git a/atom/basemodel.py b/atom/basemodel.py index 43db3e4da..82126f9be 100644 --- a/atom/basemodel.py +++ b/atom/basemodel.py @@ -50,6 +50,7 @@ ) from sklearn.utils import resample from sklearn.utils.metaestimators import available_if +from sklearn.utils.validation import _check_response_method from sktime.forecasting.base import ForecastingHorizon from sktime.forecasting.compose import make_reduction from sktime.forecasting.model_evaluation import evaluate @@ -643,7 +644,7 @@ def _get_pred( self, rows: RowSelector, target: TargetSelector | None = None, - attr: PredictionMethods | Literal["thresh"] = "predict", + method: PredictionMethods | Sequence[PredictionMethods] = "predict", ) -> tuple[Pandas, Pandas]: """Get the true and predicted values for a column. @@ -661,9 +662,10 @@ def _get_pred( Target column to look at. Only for [multioutput tasks][]. If None, all columns are returned. - attr: str, default="predict" - Method used to get predictions. Use "thresh" to get - `decision_function` or `predict_proba` in that order. + method: str or sequence, default="predict" + Response method(s) used to get predictions. If sequence, + the order provided states the order in which the methods + are tried. Returns ------- @@ -674,12 +676,7 @@ def _get_pred( Predicted values. """ - # Select method to use for predictions - if attr == "thresh": - for attribute in PredictionMethods.__args__: - if hasattr(self.estimator, attribute): - attr = attribute - break + method_caller = _check_response_method(self.estimator, method).__name__ X, y = self.branch._get_rows(rows, return_X_y=True) @@ -695,11 +692,16 @@ def _get_pred( self.estimator.get_tags().get("capability:insample") and (not self.estimator.get_tags()["requires-fh-in-fit"] or rows == "test") ): - y_pred = self._prediction(fh=X.index, X=check_empty(X), verbose=0, method=attr) + y_pred = self._prediction( + fh=X.index, + X=check_empty(X), + verbose=0, + method=method_caller, + ) else: y_pred = bk.Series([np.NaN] * len(X), index=X.index) else: - y_pred = self._prediction(X.index, verbose=0, method=attr) + y_pred = self._prediction(X.index, verbose=0, method=method_caller) if self.task.is_multioutput: if target is not None: @@ -843,21 +845,21 @@ def _get_score( Metric score on the selected data set. """ - if scorer.__class__.__name__ == "_ThresholdScorer": - y_true, y_pred = self._get_pred(rows, attr="thresh") - elif scorer.__class__.__name__ == "_ProbaScorer": - y_true, y_pred = self._get_pred(rows, attr="predict_proba") - else: - if threshold and self.task.is_binary and hasattr(self, "predict_proba"): - y_true, y_pred = self._get_pred(rows, attr="predict_proba") - if isinstance(y_pred, dataframe_t): - # Update every target column with its corresponding threshold - for i, value in enumerate(threshold): - y_pred.iloc[:, i] = (y_pred.iloc[:, i] > value).astype("int") - else: - y_pred = (y_pred > threshold[0]).astype("int") + if ( + scorer._response_method == "predict" + and threshold + and self.task.is_binary + and hasattr(self.estimator, "predict_proba") + ): + y_true, y_pred = self._get_pred(rows, method="predict_proba") + if isinstance(y_pred, dataframe_t): + # Update every target column with its corresponding threshold + for i, value in enumerate(threshold): + y_pred.iloc[:, i] = (y_pred.iloc[:, i] > value).astype("int") else: - y_true, y_pred = self._get_pred(rows, attr="predict") + y_pred = (y_pred > threshold[0]).astype("int") + else: + y_true, y_pred = self._get_pred(rows, method=scorer._response_method) kwargs = {} if "sample_weight" in sign(scorer._score_func): @@ -2917,7 +2919,7 @@ def score( metric for [multi-metric runs][]). sample_weight: sequence or None, default=None - Sample weights corresponding to y. + Sample weights corresponding to `y`. verbose: int or None, default=None Verbosity level for the transformers in the pipeline. If diff --git a/atom/pipeline.py b/atom/pipeline.py index d398e99c2..ad88b5bf0 100644 --- a/atom/pipeline.py +++ b/atom/pipeline.py @@ -90,7 +90,9 @@ class Pipeline(SkPipeline): verbose: int or None, default=0 Verbosity level of the transformers in the pipeline. If None, it leaves them to their original verbosity. If >0, the time - elapsed while fitting each step is printed. + elapsed while fitting each step is printed. Note this is not + the same as sklearn's `verbose` parameter. Use the pipeline's + verbose attribute to modify that one (defaults to False). Attributes ---------- @@ -128,9 +130,9 @@ class Pipeline(SkPipeline): # Train models atom.run(models="LR") - # Get the pipeline and make predictions - pl = atom.lr.export_pipeline() - print(pl.predict(X)) + # Get the pipeline object + pipeline = atom.lr.export_pipeline() + print(pipeline) `` """ @@ -224,7 +226,7 @@ def _iter( *, with_final: Bool = True, filter_passthrough: Bool = True, - filter_train_only: Bool = True, + filter_train_only: Bool = False, ) -> Iterator[tuple[int, str, Estimator]]: """Generate (idx, name, estimator) tuples from self.steps. @@ -239,7 +241,7 @@ def _iter( filter_passthrough: bool, default=True Whether to exclude `passthrough` elements. - filter_passthrough: bool, default=True + filter_train_only: bool, default=False Whether to exclude estimators that should only be used for training (have the `_train_only` attribute). @@ -293,9 +295,7 @@ def _fit( self.steps: list[tuple[str, Estimator]] = list(self.steps) self._validate_steps() - for step, name, transformer in self._iter( - with_final=False, filter_passthrough=False, filter_train_only=False - ): + for step, name, transformer in self._iter(with_final=False, filter_passthrough=False): if transformer is None or transformer == "passthrough": with _print_elapsed_time("Pipeline", self._log_message(step)): continue @@ -351,17 +351,9 @@ def fit( Target column corresponding to `X`. **params - - If `enable_metadata_routing=False` (default): - - Parameters passed to the `fit` method of each step, - where each parameter name is prefixed such that - parameter `p` for step `s` has key `s__p`. - - - If `enable_metadata_routing=True`: - - Parameters requested and accepted by steps. Each step - must have requested certain metadata for these parameters - to be forwarded to them. + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. Returns ------- @@ -370,11 +362,12 @@ def fit( """ routed_params = self._check_method_params(method="fit", props=params) + X, y = self._fit(X, y, routed_params) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if self._final_estimator is not None and self._final_estimator != "passthrough": with adjust_verbosity(self._final_estimator, self._verbose): - self._final_estimator.fit(X, y, **routed_params[self.steps[-1][0]]["fit"]) + self._final_estimator.fit(X, y, **routed_params[self.steps[-1][0]].fit) return self @@ -405,17 +398,9 @@ def fit_transform( Target column corresponding to `X`. **params - - If `enable_metadata_routing=False` (default): - - Parameters passed to the `fit` method of each step, - where each parameter name is prefixed such that - parameter `p` for step `s` has key `s__p`. - - - If `enable_metadata_routing=True`: - - Parameters requested and accepted by steps. Each step - must have requested certain metadata for these parameters - to be forwarded to them. + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. Returns ------- @@ -449,6 +434,8 @@ def transform( self, X: XConstructor | None = None, y: YConstructor | None = None, + *, + filter_train_only: Bool = True, **params, ) -> Pandas | tuple[DataFrame, Pandas]: """Transform the data. @@ -469,6 +456,10 @@ def transform( y: dict, sequence, dataframe or None, default=None Target column corresponding to `X`. + filter_train_only: bool, default=True + Whether to exclude transformers that should only be used + on the training set. + **params Parameters requested and accepted by steps. Each step must have requested certain metadata for these parameters to be @@ -489,7 +480,7 @@ def transform( _raise_for_params(params, self, "transform") routed_params = process_routing(self, "transform", **params) - for _, name, transformer in self._iter(): + for _, name, transformer in self._iter(filter_train_only=filter_train_only): with adjust_verbosity(transformer, self._verbose): X, y = self._mem_transform( transformer=transformer, @@ -505,6 +496,8 @@ def inverse_transform( self, X: XConstructor | None = None, y: YConstructor | None = None, + *, + filter_train_only: Bool = True, **params, ) -> Pandas | tuple[DataFrame, Pandas]: """Inverse transform for each step in a reverse order. @@ -521,6 +514,10 @@ def inverse_transform( y: dict, sequence, dataframe or None, default=None Target column corresponding to `X`. + filter_train_only: bool, default=True + Whether to exclude transformers that should only be used + on the training set. + **params Parameters requested and accepted by steps. Each step must have requested certain metadata for these parameters to be @@ -541,7 +538,8 @@ def inverse_transform( _raise_for_params(params, self, "inverse_transform") routed_params = process_routing(self, "inverse_transform", **params) - for _, name, transformer in reversed(list(self._iter())): + reverse_iter = reversed(list(self._iter(filter_train_only=filter_train_only))) + for _, name, transformer in reverse_iter: with adjust_verbosity(transformer, self._verbose): X, y = self._mem_transform( transformer=transformer, @@ -554,7 +552,7 @@ def inverse_transform( return variable_return(X, y) @available_if(_final_estimator_has("decision_function")) - def decision_function(self, X: XConstructor) -> np.ndarray: + def decision_function(self, X: XConstructor, **params) -> np.ndarray: """Transform, then decision_function of the final estimator. Parameters @@ -562,6 +560,11 @@ def decision_function(self, X: XConstructor) -> np.ndarray: X: dataframe-like Feature set with shape=(n_samples, n_features). + **params + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. + Returns ------- np.ndarray @@ -571,18 +574,28 @@ def decision_function(self, X: XConstructor) -> np.ndarray: multiclass classification tasks. """ - for _, _, transformer in self._iter(with_final=False): + _raise_for_params(params, self, "decision_function") + + routed_params = process_routing(self, "decision_function", **params) + + for _, name, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): - X, _ = self._mem_transform(transformer, X) + X, _ = self._mem_transform( + transformer=transformer, + X=X, + **routed_params.get(name, {}).get("transform", {}), + ) - return self.steps[-1][1].decision_function(X) + return self.steps[-1][1].decision_function( + X, **routed_params.get(self.steps[-1][0], {}).get("decision_function", {}) + ) @available_if(_final_estimator_has("predict")) def predict( self, X: XConstructor | None = None, fh: FHConstructor | None = None, - **predict_params, + **params, ) -> np.ndarray | Pandas: """Transform, then predict of the final estimator. @@ -596,12 +609,14 @@ def predict( The forecasting horizon encoding the time stamps to forecast at. Only for [forecast][time-series] tasks. - **predict_params - Additional keyword arguments for the predict method. Note - that while this may be used to return uncertainties from - some models with return_std or return_cov, uncertainties - that are generated by the transformations in the pipeline - are not propagated to the final estimator. + **params + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. Note that while this may be used to + return uncertainties from some models with `return_std` or + `return_cov`, uncertainties that are generated by the + transformations in the pipeline are not propagated to the + final estimator. Returns ------- @@ -613,17 +628,19 @@ def predict( if X is None and fh is None: raise ValueError("X and fh cannot be both None.") - for _, _, transformer in self._iter(with_final=False): + routed_params = process_routing(self, "predict", **params) + + for _, name, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): - X, _ = self._mem_transform(transformer, X) + X, _ = self._mem_transform(transformer, X, **routed_params[name].transform) if "fh" in sign(self.steps[-1][1].predict): if fh is None: raise ValueError("The fh parameter cannot be None for forecasting estimators.") - return self.steps[-1][1].predict(fh=fh, X=X, **predict_params) + return self.steps[-1][1].predict(fh=fh, X=X) else: - return self.steps[-1][1].predict(X, **predict_params) + return self.steps[-1][1].predict(X, **routed_params[self.steps[-1][0]].predict) @available_if(_final_estimator_has("predict_interval")) def predict_interval( @@ -653,14 +670,14 @@ def predict_interval( Computed interval forecasts. """ - for _, _, transformer in self._iter(with_final=False): + for _, _, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): X, y = self._mem_transform(transformer, X) return self.steps[-1][1].predict_interval(fh=fh, X=X, coverage=coverage) @available_if(_final_estimator_has("predict_log_proba")) - def predict_log_proba(self, X: XConstructor) -> np.ndarray: + def predict_log_proba(self, X: XConstructor, **params) -> np.ndarray: """Transform, then predict_log_proba of the final estimator. Parameters @@ -668,6 +685,11 @@ def predict_log_proba(self, X: XConstructor) -> np.ndarray: X: dataframe-like Feature set with shape=(n_samples, n_features). + **params + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. + Returns ------- list or np.ndarray @@ -675,11 +697,15 @@ def predict_log_proba(self, X: XConstructor) -> np.ndarray: n_classes) or a list of arrays for [multioutput tasks][]. """ - for _, _, transformer in self._iter(with_final=False): + routed_params = process_routing(self, "predict_log_proba", **params) + + for _, name, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): - X, _ = self._mem_transform(transformer, X) + X, _ = self._mem_transform(transformer, X, **routed_params[name].transform) - return self.steps[-1][1].predict_log_proba(X) + return self.steps[-1][1].predict_log_proba( + X, **routed_params[self.steps[-1][0]].predict_log_proba + ) @available_if(_final_estimator_has("predict_proba")) def predict_proba( @@ -688,6 +714,7 @@ def predict_proba( fh: FHConstructor | None = None, *, marginal: Bool = True, + **params, ) -> list[np.ndarray] | np.ndarray | Normal: """Transform, then predict_proba of the final estimator. @@ -705,6 +732,11 @@ def predict_proba( Whether returned distribution is marginal by time index. Only for [forecast][time-series] tasks. + **params + Parameters requested and accepted by steps. Each step must + have requested certain metadata for these parameters to be + forwarded to them. + Returns ------- list, np.ndarray or sktime.proba.[Normal][] @@ -719,9 +751,11 @@ def predict_proba( if X is None and fh is None: raise ValueError("X and fh cannot be both None.") - for _, _, transformer in self._iter(with_final=False): + routed_params = process_routing(self, "predict_proba", **params) + + for _, name, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): - X, _ = self._mem_transform(transformer, X) + X, _ = self._mem_transform(transformer, X, **routed_params[name].transform) if "fh" in sign(self.steps[-1][1].predict_proba): if fh is None: @@ -729,7 +763,9 @@ def predict_proba( return self.steps[-1][1].predict_proba(fh=fh, X=X, marginal=marginal) else: - return self.steps[-1][1].predict_proba(X) + return self.steps[-1][1].predict_proba( + X, **routed_params[self.steps[-1][0]].predict_proba + ) @available_if(_final_estimator_has("predict_quantiles")) def predict_quantiles( @@ -760,7 +796,7 @@ def predict_quantiles( Computed quantile forecasts. """ - for _, _, transformer in self._iter(with_final=False): + for _, _, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): X, y = self._mem_transform(transformer, X) @@ -789,7 +825,7 @@ def predict_residuals( n_targets) for [multivariate][] tasks. """ - for _, _, transformer in self._iter(with_final=False): + for _, _, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): X, y = self._mem_transform(transformer, X, y) @@ -824,7 +860,7 @@ def predict_var( Computed variance forecasts. """ - for _, _, transformer in self._iter(with_final=False): + for _, _, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): X, _ = self._mem_transform(transformer, X) @@ -838,6 +874,7 @@ def score( fh: FHConstructor | None = None, *, sample_weight: Sequence[Scalar] | None = None, + **params, ) -> Float: """Transform, then score of the final estimator. @@ -854,23 +891,27 @@ def score( The forecasting horizon encoding the time stamps to score. sample_weight: sequence or None, default=None - Sample weights corresponding to y. + Sample weights corresponding to `y` passed to the `score` + method of the final estimator. If None, no sampling weight + is performed. Returns ------- float Mean accuracy, r2 or mape of self.predict(X) with respect - to y. + to `y` (depending on task). """ if X is None and y is None: raise ValueError("X and y cannot be both None.") - for _, _, transformer in self._iter(with_final=False): + routed_params = process_routing(self, "score", sample_weight=sample_weight, **params) + + for _, name, transformer in self._iter(with_final=False, filter_train_only=True): with adjust_verbosity(transformer, self._verbose): - X, y = self._mem_transform(transformer, X, y) + X, y = self._mem_transform(transformer, X, y, **routed_params[name].transform) if "fh" in sign(self.steps[-1][1].score): return self.steps[-1][1].score(y=y, X=X, fh=fh) else: - return self.steps[-1][1].score(X, y, sample_weight=sample_weight) + return self.steps[-1][1].score(X, y, **routed_params[self.steps[-1][0]].score) diff --git a/atom/plots/predictionplot.py b/atom/plots/predictionplot.py index 8c325d4e7..f32954740 100644 --- a/atom/plots/predictionplot.py +++ b/atom/plots/predictionplot.py @@ -342,7 +342,7 @@ def plot_calibration( for m in models_c: for child, ds in self._get_set(rows): - y_true, y_pred = m._get_pred(ds, target, attr="predict_proba") + y_true, y_pred = m._get_pred(ds, target, method="predict_proba") # Get calibration (frac of positives and predicted values) frac_pos, pred = calibration_curve(y_true, y_pred, n_bins=n_bins) @@ -533,7 +533,7 @@ def plot_confusion_matrix( xaxis, yaxis = BasePlot._fig.get_axes() for m in models_c: - y_true, y_pred = m._get_pred(rows, target_c, attr="predict") + y_true, y_pred = m._get_pred(rows, target_c, method="predict") if threshold != 0.5: y_pred = (y_pred > threshold).astype(int) @@ -705,7 +705,9 @@ def plot_det( for m in models_c: for child, ds in self._get_set(rows): # Get fpr-fnr pairs for different thresholds - fpr, fnr, _ = det_curve(*m._get_pred(ds, target, attr="thresh")) + fpr, fnr, _ = det_curve( + *m._get_pred(ds, target, method=("decision_function", "predict_proba")) + ) fig.add_trace( self._draw_line( @@ -1472,7 +1474,9 @@ def plot_gains( for m in models_c: for child, ds in self._get_set(rows): - y_true, y_pred = m._get_pred(ds, target, attr="thresh") + y_true, y_pred = m._get_pred( + ds, target, method=("decision_function", "predict_proba") + ) fig.add_trace( self._draw_line( @@ -1751,7 +1755,9 @@ def plot_lift( for m in models_c: for child, ds in self._get_set(rows): - y_true, y_pred = m._get_pred(ds, target, attr="thresh") + y_true, y_pred = m._get_pred( + ds, target, method=("decision_function", "predict_proba") + ) gains = np.cumsum(y_true.iloc[np.argsort(y_pred)[::-1]]) / y_true.sum() fig.add_trace( @@ -2817,7 +2823,9 @@ def plot_prc( for m in models_c: for child, ds in self._get_set(rows): - y_true, y_pred = m._get_pred(ds, target, attr="thresh") + y_true, y_pred = m._get_pred( + ds, target, method=("decision_function", "predict_proba") + ) # Get precision-recall pairs for different thresholds prec, rec, _ = precision_recall_curve(y_true, y_pred) @@ -3429,7 +3437,9 @@ def plot_roc( for m in models_c: for child, ds in self._get_set(rows): # Get False (True) Positive Rate as arrays - fpr, tpr, _ = roc_curve(*m._get_pred(ds, target, attr="thresh")) + fpr, tpr, _ = roc_curve( + *m._get_pred(ds, target, method=("decision_function", "predict_proba")) + ) fig.add_trace( self._draw_line( @@ -3731,7 +3741,7 @@ def plot_threshold( xaxis, yaxis = BasePlot._fig.get_axes() for m in models_c: - y_true, y_pred = m._get_pred(rows, target, attr="predict_proba") + y_true, y_pred = m._get_pred(rows, target, method="predict_proba") for met in metric_c: fig.add_trace( self._draw_line( diff --git a/atom/utils/types.py b/atom/utils/types.py index d6c6227cb..d1075aa27 100644 --- a/atom/utils/types.py +++ b/atom/utils/types.py @@ -120,9 +120,10 @@ class Estimator(Protocol): """Protocol for sklearn-like estimators.""" def __init__(self, *args, **kwargs): ... + + def fit(self, *args, **kwargs): ... def get_params(self, *args, **kwargs): ... def set_params(self, *args, **kwargs): ... - def fit(self, *args, **kwargs): ... @runtime_checkable diff --git a/atom/utils/utils.py b/atom/utils/utils.py index cf59d6d9b..09a805da7 100644 --- a/atom/utils/utils.py +++ b/atom/utils/utils.py @@ -413,12 +413,12 @@ def evaluate( # Convert CatBoost predictions to probabilities e = np.exp(approxes[0]) y_pred = e / (1 + e) - if self.scorer.__class__.__name__ == "_PredictScorer": + if self.scorer._response_method == "predict": y_pred = (y_pred > 0.5).astype(int) elif self.task.is_multiclass: y_pred = np.array(approxes).T - if self.scorer.__class__.__name__ == "_PredictScorer": + if self.scorer._response_method == "predict": y_pred = np.argmax(y_pred, axis=1) else: @@ -481,7 +481,7 @@ def __call__( Whether higher is better. """ - if self.scorer.__class__.__name__ == "_PredictScorer": + if self.scorer._response_method == "predict": if self.task.is_binary: y_pred = (y_pred > 0.5).astype(int) elif self.task.is_multiclass: @@ -536,7 +536,7 @@ def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> Float: Metric score. """ - if self.scorer.__class__.__name__ == "_PredictScorer": + if self.scorer._response_method == "predict": if self.task.is_binary: y_pred = (y_pred > 0.5).astype(int) elif self.task.is_multiclass: @@ -2634,7 +2634,8 @@ def fit_transform_one( - If dataframe: Target columns for multioutput tasks. routed_params: Bunch - Routed parmeters for the `fit` and `transform` methods. + Routed parameters for the `fit` method. Note that parameters + are never routed to the `transform` method. message: str or None, default=None Short message. If None, nothing will be printed. @@ -2651,8 +2652,8 @@ def fit_transform_one( Fitted transformer. """ - fit_one(transformer, X, y, message, **routed_params.fit) - X, y = transform_one(transformer, X, y, **routed_params.transform) + fit_one(transformer, X, y, message, **routed_params.fit_transform) + X, y = transform_one(transformer, X, y) return X, y, transformer @@ -2814,16 +2815,22 @@ def wrapper( self._check_feature_names(Xt, reset=True) self._check_n_features(Xt, reset=True) return f(self, Xt, yt, **kwargs) + else: if "TransformerMixin" not in str(self.fit): check_is_fitted(self) + Xt, yt = self._check_input( X=X, y=y, columns=getattr(self, "feature_names_in_", None), name=getattr(self, "target_names_in_", None), ) - return f(self, Xt, yt, **kwargs) + + if "y" in sign(f): + return f(self, Xt, yt, **kwargs) + else: + return f(self, Xt, **kwargs) return wrapper diff --git a/docs_sources/dependencies.md b/docs_sources/dependencies.md index 90a83b9c8..513dabbef 100644 --- a/docs_sources/dependencies.md +++ b/docs_sources/dependencies.md @@ -44,7 +44,7 @@ packages are necessary for its correct functioning. * **[plotly](https://plotly.com/python/)** (>=5.15.0) * **[ray[serve]](https://docs.ray.io/en/latest/)** (>=2.7.1) * **[requests](https://requests.readthedocs.io/en/latest/)** (>=2.31.0) -* **[scikit-learn](https://scikit-learn.org/stable/)** (>=1.3.1) +* **[scikit-learn](https://scikit-learn.org/stable/)** (>=1.4.0) * **[scikit-learn-intelex](https://github.com/intel/scikit-learn-intelex)** (>=2023.2.1) * **[scipy](https://www.scipy.org/)** (>=1.10.1) * **[shap](https://github.com/slundberg/shap/)** (>=0.43.0) diff --git a/pyproject.toml b/pyproject.toml index 1302d85c8..d79784634 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "plotly>=5.15.0", "ray[serve]>=2.7.1", "requests>=2.31.0", - "scikit-learn>=1.3.1", + "scikit-learn>=1.4.0", "scikit-learn-intelex>=2023.2.1; platform_machine == 'x86_64' or platform_machine == 'AMD64'", "scipy>=1.10.1", "shap>=0.43.0", diff --git a/tests/conftest.py b/tests/conftest.py index 9679fe955..9721308ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ from sktime.datasets import load_airline, load_longley from sktime.split import temporal_train_test_split +from atom.data_cleaning import TransformerMixin from atom.utils.utils import merge, n_cols, to_df, to_pandas @@ -33,7 +34,7 @@ from atom.utils.types import DataFrame, Pandas, Sequence, XSelector -class DummyTransformer(BaseEstimator): +class DummyTransformer(TransformerMixin, BaseEstimator): """Transformer class for testing name keeping of arrays. Parameters @@ -49,6 +50,7 @@ class DummyTransformer(BaseEstimator): """ def __init__(self, strategy: str): + super().__init__(logger=None, verbose=0) self.strategy = strategy def transform(self, X: DataFrame) -> np.ndarray: