diff --git a/atom/atom.py b/atom/atom.py index 6da9177e4..091cc9eec 100644 --- a/atom/atom.py +++ b/atom/atom.py @@ -274,8 +274,7 @@ def n_outliers(self) -> int | None: """Number of samples in the training set containing outliers.""" if not is_sparse(self.X): data = self.train.select_dtypes(include=["number"]) - z_scores = (np.abs(stats.zscore(data.values.astype(float))) > 3) - return (z_scores.abs() > 3).any(axis=1).sum() + return (np.abs(stats.zscore(data.values.astype(float))) > 3).any(axis=1).sum() @property def classes(self) -> pd.DataFrame | None: diff --git a/atom/basetransformer.py b/atom/basetransformer.py index 237e7f3a0..14c78e3c3 100644 --- a/atom/basetransformer.py +++ b/atom/basetransformer.py @@ -756,8 +756,6 @@ def _no_data_sets( else: test_size = self.test_size - splitter = self._get_est_class("train_test_split", "model_selection") - try: # Define holdout set size if self.holdout_size: @@ -773,7 +771,7 @@ def _no_data_sets( f"got {self.holdout_size}." ) - data, holdout = splitter( + data, holdout = train_test_split( data, test_size=holdout_size, random_state=self.random_state, @@ -784,7 +782,7 @@ def _no_data_sets( else: holdout = None - train, test = splitter( + train, test = train_test_split( data, test_size=test_size, random_state=self.random_state,