fix tests all plots

tvdboom · Oct 13, 2023 · 77561a3 · 77561a3
1 parent 2023ab2
commit 77561a3
Show file tree

Hide file tree

Showing 20 changed files with 768 additions and 897 deletions.
diff --git a/atom/atom.py b/atom/atom.py
@@ -503,7 +503,7 @@ def automl(self, **kwargs):
 
                 self._models.append(model)
                 self._log(
-                    f" --> Adding model {model._fullname} "
+                    f" --> Adding model {model.fullname} "
                     f"({model.name}) to the pipeline...", 2
                 )
                 break  # Avoid non-linear pipelines

diff --git a/atom/basemodel.py b/atom/basemodel.py
@@ -284,7 +284,7 @@ def __getitem__(self, item: Int | str | list) -> Pandas:
             return self.dataset[item]  # Get a subset of the dataset
 
     @property
-    def _fullname(self) -> str:
+    def fullname(self) -> str:
         """Return the model's class name."""
         return self.__class__.__name__
 
@@ -1010,17 +1010,17 @@ def fit_model(
                 estimator = results[0][0]
                 score = list(np.mean(scores := [r[1] for r in results], axis=0))
 
-            if len(results) > 1:
-                # Report cv scores for termination judgment
-                report_cross_validation_scores(trial, scores)
+                if len(results) > 1:
+                    # Report cv scores for termination judgment
+                    report_cross_validation_scores(trial, scores)
 
             trial.set_user_attr("estimator", estimator)
 
             return score
 
         # Running hyperparameter tuning ============================ >>
 
-        self._log(f"Running hyperparameter tuning for {self._fullname}...", 1)
+        self._log(f"Running hyperparameter tuning for {self.fullname}...", 1)
 
         # Check the validity of the provided parameters
         self._check_est_params()
@@ -1041,7 +1041,7 @@ def fit_model(
                     raise ValueError(
                         "Invalid value for the distributions parameter. "
                         f"Parameter {n} is not a predefined hyperparameter "
-                        f"of the {self._fullname} model. See the model's "
+                        f"of the {self.fullname} model. See the model's "
                         "documentation for an overview of the available "
                         "hyperparameters and their distributions."
                     )
@@ -1091,7 +1091,7 @@ def fit_model(
         # Initialize live study plot
         if self._ht.get("plot", False) and n_jobs == 1:
             plot_callback = PlotCallback(
-                name=self._fullname,
+                name=self.fullname,
                 metric=self._metric.keys(),
                 aesthetics=self.aesthetics,
             )
@@ -1159,7 +1159,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
         self.clear()  # Reset model's state
 
         if self._study is None:
-            self._log(f"Results for {self._fullname}:", 1)
+            self._log(f"Results for {self.fullname}:", 1)
         self._log(f"Fit {'-' * 45}", 1)
 
         # Assign estimator if not done already
@@ -1193,7 +1193,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
                 mlflow.set_tags(
                     {
                         "name": self.name,
-                        "model": self._fullname,
+                        "model": self.fullname,
                         "branch": self.branch.name,
                         **self._ht["tags"],
                     }
@@ -1213,17 +1213,17 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
                         for step in range(len(value)):
                             mlflow.log_metric(f"evals_{key}", value[step], step=step)
 
-                # The Rest of the metrics are tracked when calling _get_score
-                mlflow.log_metric("time_fit", self.time_fit)
+                # The rest of the metrics are tracked when calling _get_score
+                mlflow.log_metric("time_fit", self._time_fit)
 
                 mlflow.sklearn.log_model(
                     sk_model=self.estimator,
                     artifact_path=self._est_class.__name__,
                     signature=infer_signature(
                         model_input=pd.DataFrame(self.X),
-                        model_output=self.estimator.predict(self.test.iloc[0]),
+                        model_output=self.estimator.predict(self.X_test.iloc[[0]]),
                     ),
-                    input_example=pd.DataFrame(self.X.iloc[[0], :]),
+                    input_example=pd.DataFrame(self.X.iloc[[0]]),
                 )
 
                 if self.log_data:
@@ -1239,9 +1239,9 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
                         artifact_path=f"{self._est_class.__name__}_pipeline",
                         signature=infer_signature(
                             model_input=pd.DataFrame(self.X),
-                            model_output=self.estimator.predict(self.test.iloc[0]),
+                            model_output=self.estimator.predict(self.X_test.iloc[[0]]),
                         ),
-                        input_example=pd.DataFrame(self.X.iloc[[0], :]),
+                        input_example=pd.DataFrame(self.X.iloc[[0]]),
                     )
 
     @composed(crash, method_to_log)
@@ -1303,7 +1303,7 @@ def bootstrapping(self, n_bootstrap: Int, reset: Bool = False):
         self._log(f"Evaluation --> {'   '.join(out)}", 1)
 
         self._time_bootstrap += (dt.now() - t_init).total_seconds()
-        self._log(f"Time elapsed: {time_to_str(self.time_bootstrap)}", 1)
+        self._log(f"Time elapsed: {time_to_str(self._time_bootstrap)}", 1)
 
     # Utility properties =========================================== >>
 
@@ -1524,7 +1524,7 @@ def results(self) -> pd.Series:
                 data[f"{met.name}_{ds}"] = self._get_score(met, ds)
         data["time_fit"] = self._time_fit
         if self._bootstrap is not None:
-            for met in self._metic.keys():
+            for met in self._metric.keys():
                 data[f"{met}_bootstrap"] = self.bootstrap[met].mean()
             data["time_bootstrap"] = self._time_bootstrap
         data["time"] = data.get("time_ht", 0) + self._time_fit + self._time_bootstrap
@@ -2244,7 +2244,7 @@ def register(
 
         model = mlflow.register_model(
             model_uri=f"runs:/{self.run.info.run_id}/{self._est_class.__name__}",
-            name=name or self._fullname,
+            name=name or self.fullname,
             tags=self._ht["tags"] or None,
         )
 
@@ -2274,7 +2274,7 @@ def save_estimator(self, filename: str = "auto"):
         with open(filename, "wb") as f:
             pickle.dump(self.estimator, f)
 
-        self._log(f"{self._fullname} estimator successfully saved.", 1)
+        self._log(f"{self.fullname} estimator successfully saved.", 1)
 
     @composed(crash, method_to_log)
     def serve(self, method: str = "predict", host: str = "127.0.0.1", port: Int = 8000):
@@ -2346,7 +2346,7 @@ async def __call__(self, request: Request) -> str:
         server = ServeModel.bind(model=self.export_pipeline(verbose=0), method=method)
         serve.run(server, host=host, port=port)
 
-        self._log(f"Serving model {self._fullname} on {host}:{port}...", 1)
+        self._log(f"Serving model {self.fullname} on {host}:{port}...", 1)
 
     @composed(crash, method_to_log)
     def transform(
@@ -2488,7 +2488,7 @@ def assign_prediction_columns() -> list[str]:
                 X, y = X
 
         if method != "score":
-            pred = self.memory.cache(getattr(self.estimator, method))(X)
+            pred = np.array(self.memory.cache(getattr(self.estimator, method))(X))
 
             if np.array(pred).ndim < 3:
                 data = to_pandas(
@@ -2497,19 +2497,19 @@ def assign_prediction_columns() -> list[str]:
                     name=self.target,
                     columns=assign_prediction_columns(),
                 )
-            elif self.task.startswith("multilabel"):
+            elif self.task is Task.multilabel_classification:
                 # Convert to (n_samples, n_targets)
                 data = bk.DataFrame(
                     data=np.array([d[:, 1] for d in pred]).T,
                     index=X.index,
                     columns=assign_prediction_columns(),
                 )
             else:
-                # Convert to (n_samples * n_classes, n_targets)
+                # Convert to (n_samples * n_classes, n_targets)'
                 data = bk.DataFrame(
                     data=pred.reshape(-1, pred.shape[2]),
                     index=bk.MultiIndex.from_tuples(
-                        [(col, idx) for col in np.unique(self.y) for idx in X]
+                        [(col, idx) for col in np.unique(self.y) for idx in X.index]
                     ),
                     columns=assign_prediction_columns(),
                 )

diff --git a/atom/baserunner.py b/atom/baserunner.py
@@ -177,7 +177,7 @@ def winners(self) -> list[Model] | None:
         """
         if self._models:  # Returns None if not fitted
             return sorted(
-                self._models, key=lambda x: (x._best_score(), x.time_fit), reverse=True
+                self._models, key=lambda x: (x._best_score(), x._time_fit), reverse=True
             )
 
     @property
@@ -402,7 +402,7 @@ def available_models(self) -> pd.DataFrame:
                 rows.append(
                     {
                         "acronym": m.acronym,
-                        "model": m._fullname,
+                        "model": m.fullname,
                         "estimator": m._est_class.__name__,
                         "module": m._est_class.__module__.split(".")[0] + m._module,
                         "needs_scaling": m.needs_scaling,
@@ -790,7 +790,7 @@ def stacking(
                 if self._goal not in model._estimators:
                     raise ValueError(
                         "Invalid value for the final_estimator parameter. Model "
-                        f"{model._fullname} can not perform {self.task} tasks."
+                        f"{model.fullname} can not perform {self.task} tasks."
                     )
 
                 kwargs["final_estimator"] = model._get_est()

diff --git a/atom/basetrainer.py b/atom/basetrainer.py
@@ -417,15 +417,15 @@ def execute_model(m: Model) -> Model | None:
 
         maxlen = 0
         names, scores = [], []
-        for model in self._models:
+        for m in self._models:
             # Add the model name for repeated model classes
-            if len(list(filter(lambda x: x.acronym == model.acronym, self._models))) > 1:
-                names.append(f"{model._fullname} ({model.name})")
+            if len(list(filter(lambda x: x.acronym == m.acronym, self._models))) > 1:
+                names.append(f"{m.fullname} ({m.name})")
             else:
-                names.append(model._fullname)
+                names.append(m.fullname)
 
             try:
-                scores.append(model._best_score())
+                scores.append(m._best_score())
             except (ValueError, AttributeError):  # Fails when errors="keep"
                 scores.append(-np.inf)
 

diff --git a/atom/feature_engineering.py b/atom/feature_engineering.py
@@ -34,7 +34,6 @@
 from atom.basetransformer import BaseTransformer
 from atom.data_cleaning import Scaler, TransformerMixin
 from atom.models import MODELS
-from atom.plots import FeatureSelectionPlot
 from atom.utils.types import (
     Backend, Bool, DataFrame, Engine, Features, FeatureSelectionSolvers,
     FeatureSelectionStrats, FloatLargerEqualZero, FloatLargerZero,
@@ -776,17 +775,13 @@ def transform(self, X: Features, y: Target | None = None) -> DataFrame:
         return X
 
 
-class FeatureSelector(
-    BaseEstimator,
-    TransformerMixin,
-    FeatureSelectionPlot,
-):
+class FeatureSelector(BaseEstimator, TransformerMixin, BaseTransformer):
     """Reduce the number of features in the data.
 
     Apply feature selection or dimensionality reduction, either to
     improve the estimators' accuracy or to boost their performance on
     very high-dimensional datasets. Additionally, remove multicollinear
-    and low variance features.
+    and low-variance features.
 
     This class can be accessed from atom through the [feature_selection]
     [atomclassifier-feature_selection] method. Read more in the
@@ -810,10 +805,14 @@ class FeatureSelector(
           with sparse matrices.
 
     !!! tip
-        Use the [plot_feature_importance][] method to examine how much
-        a specific feature contributes to the final predictions. If the
-        model doesn't have a `feature_importances_` attribute, use
-        [plot_permutation_importance][] instead.
+        * Use the [plot_pca][] and [plot_components][] methods to
+          examine the results after using strategy="pca".
+        * Use the [plot_rfecv][] method to examine the results after
+          using strategy="rfecv".
+        * Use the [plot_feature_importance][] method to examine how
+          much a specific feature contributes to the final predictions.
+          If the model doesn't have a `feature_importances_` attribute,
+          use [plot_permutation_importance][] instead.
 
     Parameters
     ----------
@@ -1027,8 +1026,6 @@ class FeatureSelector(
 
         # Note that the column names changed
         print(atom.dataset)
-
-        atom.plot_pca()
         ```
 
     === "stand-alone"

diff --git a/atom/models/custom.py b/atom/models/custom.py
@@ -29,15 +29,15 @@ def __init__(self, **kwargs):
             from atom.models import MODELS
 
             # If no name is provided, use the name of the class
-            name = self._fullname
+            name = self.fullname
             if len(n := list(filter(str.isupper, name))) >= 2 and n not in MODELS:
                 name = "".join(n)
 
         self.acronym = getattr(est, "acronym", name)
         if not name.startswith(self.acronym):
             raise ValueError(
                 f"The name ({name}) and acronym ({self.acronym}) of model "
-                f"{self._fullname} do not match. The name should start with "
+                f"{self.fullname} do not match. The name should start with "
                 f"the model's acronym."
             )
 
@@ -49,7 +49,7 @@ def __init__(self, **kwargs):
         super().__init__(name=name, **kwargs)
 
     @property
-    def _fullname(self) -> str:
+    def fullname(self) -> str:
         """Return the estimator's class name."""
         return self._est_class.__name__
 

diff --git a/atom/models/ensembles.py b/atom/models/ensembles.py
@@ -110,7 +110,7 @@ def __init__(self, models: ClassMap, **kwargs):
                     raise ValueError(
                         "Invalid value for the voting parameter. If "
                         "'soft', all models in the ensemble should have "
-                        f"a predict_proba method, got {m._fullname}."
+                        f"a predict_proba method, got {m.fullname}."
                     )
 
     def _get_est(self, **params) -> Predictor:

diff --git a/atom/plots/__init__.py b/atom/plots/__init__.py
@@ -10,20 +10,12 @@
 from abc import ABC
 
 from atom.plots.dataplot import DataPlot
-from atom.plots.featureselectionplot import FeatureSelectionPlot
 from atom.plots.hyperparametertuningplot import HyperparameterTuningPlot
 from atom.plots.predictionplot import PredictionPlot
 from atom.plots.shapplot import ShapPlot
 
 
-class ATOMPlot(
-    FeatureSelectionPlot,
-    DataPlot,
-    HyperparameterTuningPlot,
-    PredictionPlot,
-    ShapPlot,
-    ABC,
-):
+class ATOMPlot(DataPlot, HyperparameterTuningPlot, PredictionPlot, ShapPlot, ABC):
     """Plot classes inherited by main ATOM classes."""
     pass